{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pygoose import *"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import datetime"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/Cellar/python3/3.6.3/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/lightgbm/__init__.py:46: UserWarning: Starting from version 2.2.1, the library file in distribution wheels for macOS is built by the Apple Clang (Xcode_8.3.1) compiler.\n",
      "This means that in case of installing LightGBM from PyPI via the ``pip install lightgbm`` command, you don't need to install the gcc compiler anymore.\n",
      "Instead of that, you need to install the OpenMP library, which is required for running LightGBM on the system with the Apple Clang compiler.\n",
      "You can install the OpenMP library by the following command: ``brew install libomp``.\n",
      "  \"You can install the OpenMP library by the following command: ``brew install libomp``.\", UserWarning)\n"
     ]
    }
   ],
   "source": [
    "from sklearn import model_selection\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.neighbors import KNeighborsClassifier\n",
    "from sklearn.naive_bayes import GaussianNB\n",
    "from sklearn.svm import LinearSVC \n",
    "from sklearn.svm import SVC\n",
    "from sklearn.ensemble import RandomForestClassifier\n",
    "from sklearn.ensemble import GradientBoostingClassifier, AdaBoostClassifier\n",
    "import lightgbm as lgb\n",
    "import xgboost as xgb\n",
    "from xgboost import XGBClassifier\n",
    "from lightgbm import LGBMClassifier\n",
    "\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "from mlxtend.classifier import StackingCVClassifier, StackingClassifier\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Config"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Automatically discover the paths to various data folders and compose the project structure."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "project = kg.Project.discover()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "NUM_FOLDS = 5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "RANDOM_SEED = 42\n",
    "np.random.seed(RANDOM_SEED)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Read Data\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "feature_lists = [\n",
    "    'simple_summaries',\n",
    "    'jaccard_ngrams',\n",
    "    'fuzzy',\n",
    "    'tfidf',\n",
    "    #'lda',\n",
    "    'nlp_tags',\n",
    "    'wordnet_similarity',\n",
    "    'phrase_embedding',\n",
    "    'wmd',\n",
    "    'wm_intersect',\n",
    "    \n",
    "    '3rdparty_abhishek',\n",
    "    '3rdparty_dasolmar_whq',\n",
    "    '3rdparty_mephistopheies',\n",
    "    '3rdparty_image_similarity',\n",
    "    \n",
    "    'magic_pagerank',\n",
    "    'magic_frequencies',\n",
    "    #'magic_cooccurrence_matrix',\n",
    "    'magic_cooccurrence_matrix_raw',\n",
    "    \n",
    "    'oofp_nn_mlp_with_magic',\n",
    "    'oofp_nn_cnn_with_magic',\n",
    "    'oofp_nn_bi_lstm_with_magic',\n",
    "    'oofp_nn_siamese_lstm_attention',\n",
    "    \n",
    "    'wordmatchshare',\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_train, df_test, feature_list_ix = project.load_feature_lists(feature_lists)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Handle the missing/inf data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "remove_invalid_feats = True"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "def find_missing_cols(df):\n",
    "    null_columns=df.columns[df.isnull().any()]\n",
    "    display(df[null_columns].isnull().sum())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "pos_tag_cosine               33\n",
       "abh_cosine_distance        1778\n",
       "abh_jaccard_distance        522\n",
       "abh_braycurtis_distance     522\n",
       "das_word_match              437\n",
       "das_word_match_2root        437\n",
       "das_cosine                  784\n",
       "das_avg_word_len1             1\n",
       "das_avg_word_len2             2\n",
       "das_diff_avg_word             3\n",
       "dtype: int64"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "find_missing_cols(df_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "pos_tag_cosine               736\n",
       "abh_cosine_distance        37232\n",
       "abh_jaccard_distance        1445\n",
       "abh_braycurtis_distance     1445\n",
       "das_word_match              5311\n",
       "das_word_match_2root        5311\n",
       "das_cosine                 10131\n",
       "das_avg_word_len1              2\n",
       "das_avg_word_len2              4\n",
       "das_diff_avg_word              6\n",
       "dtype: int64"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "find_missing_cols(df_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "if remove_invalid_feats:\n",
    "    df_train = df_train.replace([np.inf, -np.inf], np.nan)\n",
    "    df_test = df_test.replace([np.inf, -np.inf], np.nan)\n",
    "    df_train.dropna(axis=1, inplace=True) \n",
    "    df_test.dropna(axis=1, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train = df_train.values\n",
    "X_test = df_test.values\n",
    "y_train = kg.io.load(project.features_dir + 'y_train.pickle')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "X train: (404290, 170)\n",
      "X test:  (2345796, 170)\n",
      "y train: (404290,)\n"
     ]
    }
   ],
   "source": [
    "print('X train:', X_train.shape)\n",
    "print('X test: ', X_test.shape)\n",
    "print('y train:', y_train.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>feature_list</th>\n",
       "      <th>start_index</th>\n",
       "      <th>end_index</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>simple_summaries</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>jaccard_ngrams</td>\n",
       "      <td>9</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>fuzzy</td>\n",
       "      <td>24</td>\n",
       "      <td>30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>tfidf</td>\n",
       "      <td>31</td>\n",
       "      <td>32</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>nlp_tags</td>\n",
       "      <td>33</td>\n",
       "      <td>68</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>wordnet_similarity</td>\n",
       "      <td>69</td>\n",
       "      <td>70</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>phrase_embedding</td>\n",
       "      <td>71</td>\n",
       "      <td>76</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>wmd</td>\n",
       "      <td>77</td>\n",
       "      <td>77</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>wm_intersect</td>\n",
       "      <td>78</td>\n",
       "      <td>79</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>3rdparty_abhishek</td>\n",
       "      <td>80</td>\n",
       "      <td>95</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>3rdparty_dasolmar_whq</td>\n",
       "      <td>96</td>\n",
       "      <td>144</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>3rdparty_mephistopheies</td>\n",
       "      <td>145</td>\n",
       "      <td>166</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>3rdparty_image_similarity</td>\n",
       "      <td>167</td>\n",
       "      <td>167</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>magic_pagerank</td>\n",
       "      <td>168</td>\n",
       "      <td>169</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>magic_frequencies</td>\n",
       "      <td>170</td>\n",
       "      <td>173</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>magic_cooccurrence_matrix_raw</td>\n",
       "      <td>174</td>\n",
       "      <td>175</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>oofp_nn_mlp_with_magic</td>\n",
       "      <td>176</td>\n",
       "      <td>176</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>oofp_nn_cnn_with_magic</td>\n",
       "      <td>177</td>\n",
       "      <td>177</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>oofp_nn_bi_lstm_with_magic</td>\n",
       "      <td>178</td>\n",
       "      <td>178</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>oofp_nn_siamese_lstm_attention</td>\n",
       "      <td>179</td>\n",
       "      <td>179</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>wordmatchshare</td>\n",
       "      <td>180</td>\n",
       "      <td>181</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                      feature_list  start_index  end_index\n",
       "0                 simple_summaries            0          8\n",
       "1                   jaccard_ngrams            9         23\n",
       "2                            fuzzy           24         30\n",
       "3                            tfidf           31         32\n",
       "4                         nlp_tags           33         68\n",
       "5               wordnet_similarity           69         70\n",
       "6                 phrase_embedding           71         76\n",
       "7                              wmd           77         77\n",
       "8                     wm_intersect           78         79\n",
       "9                3rdparty_abhishek           80         95\n",
       "10           3rdparty_dasolmar_whq           96        144\n",
       "11         3rdparty_mephistopheies          145        166\n",
       "12       3rdparty_image_similarity          167        167\n",
       "13                  magic_pagerank          168        169\n",
       "14               magic_frequencies          170        173\n",
       "15   magic_cooccurrence_matrix_raw          174        175\n",
       "16          oofp_nn_mlp_with_magic          176        176\n",
       "17          oofp_nn_cnn_with_magic          177        177\n",
       "18      oofp_nn_bi_lstm_with_magic          178        178\n",
       "19  oofp_nn_siamese_lstm_attention          179        179\n",
       "20                  wordmatchshare          180        181"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.DataFrame(feature_list_ix, columns=['feature_list', 'start_index', 'end_index'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Feature scaling (possibly used for computational models)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.preprocessing import StandardScaler\n",
    "sc = StandardScaler()\n",
    "sc.fit(X_train)  ### fit using training data ####\n",
    "X_train_std = sc.transform(X_train)\n",
    "X_test_std = sc.transform(X_test)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Train/predict with single models"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### LightGBM"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "lgb_params = {\n",
    "        'objective': 'binary',\n",
    "        'metric': 'binary_logloss',\n",
    "        'boosting': 'gbdt',\n",
    "        'device': 'cpu',\n",
    "        'feature_fraction': 0.486,\n",
    "        'num_leaves': 158,\n",
    "        'lambda_l2': 50,\n",
    "        'learning_rate': 0.01,\n",
    "        #'num_boost_round': 5000,\n",
    "        #'early_stopping_rounds': 10,\n",
    "        'verbose': 1,\n",
    "        'bagging_fraction_seed': RANDOM_SEED,\n",
    "        'feature_fraction_seed': RANDOM_SEED,\n",
    "    }\n",
    "\n",
    "train_data = lgb.Dataset(data = X_train, label = y_train, feature_name = df_train.columns.tolist())\n",
    "\n",
    "#lgb.cv(lgb_params, train_data,  num_boost_round=5000, early_stopping_rounds=10, verbose_eval=100) ## find the best num_rounds\n",
    "\n",
    "#n_estimators = 3600"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 52min 23s, sys: 54.1 s, total: 53min 17s\n",
      "Wall time: 15min 7s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "lgb_clf = lgb.train(lgb_params, train_data,  num_boost_round=3600, verbose_eval=100)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 1h 34min 13s, sys: 36.3 s, total: 1h 34min 49s\n",
      "Wall time: 27min 34s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "y_test_lgb = lgb_clf.predict(X_test).reshape(-1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Xgboost"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "xgb_params={\n",
    "    'max_depth':8,\n",
    "    'nthread':14,\n",
    "    'eta':0.05,\n",
    "    'eval_metric':'logloss',\n",
    "    'objective':'binary:logistic',\n",
    "    'subsample':0.8,\n",
    "    'colsample_bytree':0.7,\n",
    "    'silent':1,\n",
    "    #'seed':1123,\n",
    "    'gamma':0.005,\n",
    "    'min_child_weight':1\n",
    "    'max_delta_step':1 ### need it for class imbalance\n",
    "}\n",
    "\n",
    "train_data = xgb.DMatrix(data = X_train, label = y_train)\n",
    "#xgb.cv(xgb_params, train_data,  num_boost_round=5000, early_stopping_rounds=10, verbose_eval=10)\n",
    "\n",
    "# Xtrain, Xval, ytrain, yval = train_test_split(X_train, y_train, random_state=0)\n",
    "# clf_xgb = xgb.XGBClassifier(max_depth=8, learning_rate=0.05, subsample=0.8, colsample_bytree=0.7, gamma=0.005, \n",
    "#                            n_estimators=5000)\n",
    "# clf_xgb.fit(Xtrain, ytrain, early_stopping_rounds=10, eval_metric=\"logloss\",\n",
    "#         eval_set=[(Xval, yval)], verbose=True)\n",
    "\n",
    "\n",
    "### n_estimators = 640"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 1h 31min 23s, sys: 6.31 s, total: 1h 31min 29s\n",
      "Wall time: 1h 31min 35s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "xgb_clf = xgb.train(xgb_params, train_data,  num_boost_round=640, verbose_eval=100)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 3min 33s, sys: 23.9 s, total: 3min 57s\n",
      "Wall time: 4min 11s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "test_data = xgb.DMatrix(data=X_test)\n",
    "y_test_xgb = xgb_clf.predict(test_data, ntree_limit=640).reshape(-1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Grid search for other models (not used due to time cost)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "# from sklearn.model_selection import GridSearchCV, RandomizedSearchCV\n",
    "\n",
    "# computational_model_hyperparams = {\n",
    "#     #\"KNN\" : (KNeighborsClassifier(n_jobs=-1), {\"n_neighbors\":[5,10,20,80,100,150,200]}),\n",
    "#     #\"LinearSVM\" : (LinearSVC(verbose=1), {\"C\":[0.01, 0.1, 1, 10]}),\n",
    "#     \"LR\": (LogisticRegression(n_jobs=-1, solver='sag', verbose=1), {\"penalty\":[\"l1\", \"l2\"], \"C\":[0.01, 0.1, 1, 10]}), \n",
    "#     \"RbfSVM\" : (SVC(verbose=1), {'C': [0.01, 0.1, 1, 10, ], 'gamma': [\"auto\", 0.001, 0.01, 0.1, 1, 10]}),\n",
    "# }\n",
    "\n",
    "# tree_model_hyperparams = {\n",
    "# #     \"DT\" : (DecisionTreeClassifier(), {\"criterion\":['gini', \"entropy\"], \"max_depth\":[2, 5, 10, 20, 30], \"min_samples_split\":[2,5,8,10,12], \n",
    "# #                                 \"min_samples_leaf\" : [1,4,8,10,12,20]}),\n",
    "#     \"RF\" : (RandomForestClassifier(n_jobs=-1, verbose=1), {\"max_features\":['auto', 'sqrt', \"log2\"], \"max_depth\":[2, 5, 10, 20, 30], \"min_samples_split\":[2,5,8,10,12,20], \n",
    "#                                 \"min_samples_leaf\" : [1,5,10,15,20], 'n_estimators': [100, 200, 500, 800]}),\n",
    "#     \"GB\" : (GradientBoostingClassifier(verbose=1), {\"max_features\":['auto', 'sqrt', \"log2\", None], 'learning_rate':[0.01, 0.05, 0.1, 0.2], 'n_estimators':[100, 200, 500, 800], \"max_depth\":[3, 5, 7, 10],\"min_samples_split\":[2,5,8,10,12,20],\n",
    "#                                  \"subsample\" : [0.8, 0.9, 1.0], 'min_samples_leaf': [5, 10, 20, 50, 100, 200]})\n",
    "# }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "# def run_params_search(X_train, y_train, model_hyperparams):\n",
    "#     model_optparams = []\n",
    "#     for model, params in model_hyperparams.items():\n",
    "#         print(model)\n",
    "#         rd_search = RandomizedSearchCV(params[0], params[1], cv=5, scoring=\"neg_log_loss\", random_state=0)\n",
    "#         rd_search.fit(X_train, y_train)\n",
    "#         model_optparams.append({\n",
    "#                         \"model_name\" : model,\n",
    "#                         \"best_model\": rd_search.best_estimator_,\n",
    "#                         \"best_paras\" : rd_search.best_params_,\n",
    "#                         \"best_score\" : rd_search.best_score_})\n",
    "#     model_optparams = pd.DataFrame(model_optparams)\n",
    "#     model_optparams.set_index(\"model_name\", inplace=True)\n",
    "#     return model_optparams"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Train/predict with stacking models"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Define diffrent models"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "####### First level modesl #########\n",
    "lr = LogisticRegression(solver='sag', penalty='l2', C=1.2, max_iter=10000, n_jobs=-1, random_state=RANDOM_SEED, verbose=1)\n",
    "\n",
    "#svc = SVC(kernel='rbf', gamma='scale', probability=True, random_state=RANDOM_SEED, verbose=1) too slow\n",
    "\n",
    "lsvm = LinearSVC(random_state=RANDOM_SEED, verbose=5, max_iter=10000)\n",
    "\n",
    "knn = KNeighborsClassifier(n_neighbors=10, n_jobs=-1)\n",
    "\n",
    "bayes = GaussianNB()\n",
    "\n",
    "adb = AdaBoostClassifier(random_state=RANDOM_SEED, learning_rate=0.1, n_estimators=500)\n",
    "\n",
    "\n",
    "rfc = RandomForestClassifier(max_features='auto',\n",
    "                                 max_depth=10,\n",
    "                                 min_samples_split=10,\n",
    "                                 min_samples_leaf=20,\n",
    "                                 random_state=RANDOM_SEED,\n",
    "                                 n_estimators=500, n_jobs=-1, verbose=5)\n",
    "\n",
    "lgbc = LGBMClassifier(n_estimators=3600, num_leaves=158, learning_rate=0.01, n_jobs=-1, \n",
    "                         reg_lambda=50, colsample_bytree=0.486, \n",
    "                        objective='binary', metric='binary_logloss', random_state=RANDOM_SEED, verbose=1)\n",
    "\n",
    "\n",
    "xgbc = XGBClassifier(n_estimators=640, max_depth=8, n_jobs=-1, learning_rate=0.05, subsample=0.8, colsample_bytree=0.7,\n",
    "                       eval_metric='logloss', objective='binary:logistic', gamma=0.005, max_delta_step=1, \n",
    "                     random_state=RANDOM_SEED, \n",
    "                     verbose=2)\n",
    "\n",
    "\n",
    "####### Second level modesl #########\n",
    "lr2 = LogisticRegression(random_state=RANDOM_SEED, n_jobs=-1)\n",
    "lgbc2 = LGBMClassifier(objective='binary', metric='binary_logloss', n_estimators=5000, random_state=RANDOM_SEED, verbose=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Logistic Regression"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "# %%time\n",
    "# lr.fit(X_train_std, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "# %%time\n",
    "# y_test_lr = lr.predict_proba(X_test_std)[:,1]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### KNN"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 4min 35s, sys: 3.78 s, total: 4min 39s\n",
      "Wall time: 4min 42s\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',\n",
       "           metric_params=None, n_jobs=None, n_neighbors=5, p=2,\n",
       "           weights='uniform')"
      ]
     },
     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# %%time\n",
    "# knn.fit(X_train_std, y_train)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Bayes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "# %%time\n",
    "# bayes.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Adaboost"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "# %%time\n",
    "# adb.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### SVM"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "# %%time\n",
    "# lsvm.fit(X_train_std, y_train)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Random Forrest"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "# %%time\n",
    "# rfc.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [],
   "source": [
    "# y_test_rfc = rfc.predict_proba(X_test)[:,1]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### LGB"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [],
   "source": [
    "# %%time\n",
    "# lgbc.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [],
   "source": [
    "# y_test_lgb_sk = lgbc.predict_proba(X_test)[:,1]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Stacking all classifiers - METHOD One: different models"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [],
   "source": [
    "sclf = StackingCVClassifier(classifiers=[lr, rfc, lgbc, xgbc],\n",
    "                            use_probas=True,\n",
    "                            meta_classifier=lgbc2)\n",
    "#sclf = StackingClassifier(classifiers=[lr, rfc, lgbc, xgbc], use_probas=True, meta_classifier=lr2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 4 concurrent workers.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "convergence after 2304 epochs took 918 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed: 15.3min finished\n",
      "[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 4 concurrent workers.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "convergence after 2523 epochs took 1064 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed: 17.7min finished\n",
      "[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 4 concurrent workers.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "building tree 1 of 500\n",
      "building tree 2 of 500\n",
      "building tree 3 of 500\n",
      "building tree 4 of 500\n",
      "building tree 5 of 500\n",
      "building tree 6 of 500\n",
      "building tree 7 of 500\n",
      "building tree 8 of 500\n",
      "building tree 9 of 500\n",
      "building tree 10 of 500\n",
      "building tree 11 of 500\n",
      "building tree 12 of 500\n",
      "building tree 13 of 500\n",
      "building tree 14 of 500\n",
      "building tree 15 of 500\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:    8.2s\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "building tree 16 of 500\n",
      "building tree 17 of 500\n",
      "building tree 18 of 500\n",
      "building tree 19 of 500\n",
      "building tree 20 of 500\n",
      "building tree 21 of 500\n",
      "building tree 22 of 500\n",
      "building tree 23 of 500\n",
      "building tree 24 of 500\n",
      "building tree 25 of 500\n",
      "building tree 26 of 500\n",
      "building tree 27 of 500\n",
      "building tree 28 of 500\n",
      "building tree 29 of 500\n",
      "building tree 30 of 500\n",
      "building tree 31 of 500\n",
      "building tree 32 of 500\n",
      "building tree 33 of 500\n",
      "building tree 34 of 500\n",
      "building tree 35 of 500\n",
      "building tree 36 of 500\n",
      "building tree 37 of 500\n",
      "building tree 38 of 500\n",
      "building tree 39 of 500\n",
      "building tree 40 of 500\n",
      "building tree 41 of 500\n",
      "building tree 42 of 500\n",
      "building tree 43 of 500\n",
      "building tree 44 of 500\n",
      "building tree 45 of 500\n",
      "building tree 46 of 500\n",
      "building tree 47 of 500\n",
      "building tree 48 of 500\n",
      "building tree 49 of 500\n",
      "building tree 50 of 500\n",
      "building tree 51 of 500\n",
      "building tree 52 of 500\n",
      "building tree 53 of 500\n",
      "building tree 54 of 500\n",
      "building tree 55 of 500\n",
      "building tree 56 of 500\n",
      "building tree 57 of 500\n",
      "building tree 58 of 500\n",
      "building tree 59 of 500\n",
      "building tree 60 of 500\n",
      "building tree 61 of 500\n",
      "building tree 62 of 500\n",
      "building tree 63 of 500\n",
      "building tree 64 of 500\n",
      "building tree 65 of 500\n",
      "building tree 66 of 500\n",
      "building tree 67 of 500\n",
      "building tree 68 of 500\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:   46.0s\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "building tree 69 of 500\n",
      "building tree 70 of 500\n",
      "building tree 71 of 500\n",
      "building tree 72 of 500\n",
      "building tree 73 of 500\n",
      "building tree 74 of 500\n",
      "building tree 75 of 500\n",
      "building tree 76 of 500\n",
      "building tree 77 of 500\n",
      "building tree 78 of 500\n",
      "building tree 79 of 500\n",
      "building tree 80 of 500\n",
      "building tree 81 of 500\n",
      "building tree 82 of 500\n",
      "building tree 83 of 500\n",
      "building tree 84 of 500\n",
      "building tree 85 of 500\n",
      "building tree 86 of 500\n",
      "building tree 87 of 500\n",
      "building tree 88 of 500\n",
      "building tree 89 of 500\n",
      "building tree 90 of 500\n",
      "building tree 91 of 500\n",
      "building tree 92 of 500\n",
      "building tree 93 of 500\n",
      "building tree 94 of 500\n",
      "building tree 95 of 500\n",
      "building tree 96 of 500\n",
      "building tree 97 of 500\n",
      "building tree 98 of 500\n",
      "building tree 99 of 500\n",
      "building tree 100 of 500\n",
      "building tree 101 of 500\n",
      "building tree 102 of 500\n",
      "building tree 103 of 500\n",
      "building tree 104 of 500\n",
      "building tree 105 of 500\n",
      "building tree 106 of 500\n",
      "building tree 107 of 500\n",
      "building tree 108 of 500\n",
      "building tree 109 of 500\n",
      "building tree 110 of 500\n",
      "building tree 111 of 500\n",
      "building tree 112 of 500\n",
      "building tree 113 of 500\n",
      "building tree 114 of 500\n",
      "building tree 115 of 500\n",
      "building tree 116 of 500\n",
      "building tree 117 of 500\n",
      "building tree 118 of 500\n",
      "building tree 119 of 500\n",
      "building tree 120 of 500\n",
      "building tree 121 of 500\n",
      "building tree 122 of 500\n",
      "building tree 123 of 500\n",
      "building tree 124 of 500\n",
      "building tree 125 of 500\n",
      "building tree 126 of 500\n",
      "building tree 127 of 500\n",
      "building tree 128 of 500\n",
      "building tree 129 of 500\n",
      "building tree 130 of 500\n",
      "building tree 131 of 500\n",
      "building tree 132 of 500\n",
      "building tree 133 of 500\n",
      "building tree 134 of 500\n",
      "building tree 135 of 500\n",
      "building tree 136 of 500\n",
      "building tree 137 of 500\n",
      "building tree 138 of 500\n",
      "building tree 139 of 500\n",
      "building tree 140 of 500\n",
      "building tree 141 of 500\n",
      "building tree 142 of 500\n",
      "building tree 143 of 500\n",
      "building tree 144 of 500\n",
      "building tree 145 of 500\n",
      "building tree 146 of 500\n",
      "building tree 147 of 500\n",
      "building tree 148 of 500\n",
      "building tree 149 of 500\n",
      "building tree 150 of 500\n",
      "building tree 151 of 500\n",
      "building tree 152 of 500\n",
      "building tree 153 of 500\n",
      "building tree 154 of 500\n",
      "building tree 155 of 500\n",
      "building tree 156 of 500\n",
      "building tree 157 of 500\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=-1)]: Done 154 tasks      | elapsed:  1.7min\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "building tree 158 of 500\n",
      "building tree 159 of 500\n",
      "building tree 160 of 500\n",
      "building tree 161 of 500\n",
      "building tree 162 of 500\n",
      "building tree 163 of 500\n",
      "building tree 164 of 500\n",
      "building tree 165 of 500\n",
      "building tree 166 of 500\n",
      "building tree 167 of 500\n",
      "building tree 168 of 500\n",
      "building tree 169 of 500\n",
      "building tree 170 of 500\n",
      "building tree 171 of 500\n",
      "building tree 172 of 500\n",
      "building tree 173 of 500\n",
      "building tree 174 of 500\n",
      "building tree 175 of 500\n",
      "building tree 176 of 500\n",
      "building tree 177 of 500\n",
      "building tree 178 of 500\n",
      "building tree 179 of 500\n",
      "building tree 180 of 500\n",
      "building tree 181 of 500\n",
      "building tree 182 of 500\n",
      "building tree 183 of 500\n",
      "building tree 184 of 500\n",
      "building tree 185 of 500\n",
      "building tree 186 of 500\n",
      "building tree 187 of 500\n",
      "building tree 188 of 500\n",
      "building tree 189 of 500\n",
      "building tree 190 of 500\n",
      "building tree 191 of 500\n",
      "building tree 192 of 500\n",
      "building tree 193 of 500\n",
      "building tree 194 of 500\n",
      "building tree 195 of 500\n",
      "building tree 196 of 500\n",
      "building tree 197 of 500\n",
      "building tree 198 of 500\n",
      "building tree 199 of 500\n",
      "building tree 200 of 500\n",
      "building tree 201 of 500\n",
      "building tree 202 of 500\n",
      "building tree 203 of 500\n",
      "building tree 204 of 500\n",
      "building tree 205 of 500\n",
      "building tree 206 of 500\n",
      "building tree 207 of 500\n",
      "building tree 208 of 500\n",
      "building tree 209 of 500\n",
      "building tree 210 of 500\n",
      "building tree 211 of 500\n",
      "building tree 212 of 500\n",
      "building tree 213 of 500\n",
      "building tree 214 of 500\n",
      "building tree 215 of 500\n",
      "building tree 216 of 500\n",
      "building tree 217 of 500\n",
      "building tree 218 of 500\n",
      "building tree 219 of 500\n",
      "building tree 220 of 500\n",
      "building tree 221 of 500\n",
      "building tree 222 of 500\n",
      "building tree 223 of 500\n",
      "building tree 224 of 500\n",
      "building tree 225 of 500\n",
      "building tree 226 of 500\n",
      "building tree 227 of 500\n",
      "building tree 228 of 500\n",
      "building tree 229 of 500\n",
      "building tree 230 of 500\n",
      "building tree 231 of 500\n",
      "building tree 232 of 500\n",
      "building tree 233 of 500\n",
      "building tree 234 of 500\n",
      "building tree 235 of 500\n",
      "building tree 236 of 500\n",
      "building tree 237 of 500\n",
      "building tree 238 of 500\n",
      "building tree 239 of 500\n",
      "building tree 240 of 500\n",
      "building tree 241 of 500\n",
      "building tree 242 of 500\n",
      "building tree 243 of 500\n",
      "building tree 244 of 500\n",
      "building tree 245 of 500\n",
      "building tree 246 of 500\n",
      "building tree 247 of 500\n",
      "building tree 248 of 500\n",
      "building tree 249 of 500\n",
      "building tree 250 of 500\n",
      "building tree 251 of 500\n",
      "building tree 252 of 500\n",
      "building tree 253 of 500\n",
      "building tree 254 of 500\n",
      "building tree 255 of 500\n",
      "building tree 256 of 500\n",
      "building tree 257 of 500\n",
      "building tree 258 of 500\n",
      "building tree 259 of 500\n",
      "building tree 260 of 500\n",
      "building tree 261 of 500\n",
      "building tree 262 of 500\n",
      "building tree 263 of 500\n",
      "building tree 264 of 500\n",
      "building tree 265 of 500\n",
      "building tree 266 of 500\n",
      "building tree 267 of 500\n",
      "building tree 268 of 500\n",
      "building tree 269 of 500\n",
      "building tree 270 of 500\n",
      "building tree 271 of 500\n",
      "building tree 272 of 500\n",
      "building tree 273 of 500\n",
      "building tree 274 of 500\n",
      "building tree 275 of 500\n",
      "building tree 276 of 500\n",
      "building tree 277 of 500\n",
      "building tree 278 of 500\n",
      "building tree 279 of 500\n",
      "building tree 280 of 500\n",
      "building tree 281 of 500\n",
      "building tree 282 of 500\n",
      "building tree 283 of 500\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=-1)]: Done 280 tasks      | elapsed:  2.9min\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "building tree 284 of 500building tree 285 of 500\n",
      "\n",
      "building tree 286 of 500\n",
      "building tree 287 of 500\n",
      "building tree 288 of 500\n",
      "building tree 289 of 500\n",
      "building tree 290 of 500\n",
      "building tree 291 of 500\n",
      "building tree 292 of 500\n",
      "building tree 293 of 500\n",
      "building tree 294 of 500\n",
      "building tree 295 of 500\n",
      "building tree 296 of 500\n",
      "building tree 297 of 500\n",
      "building tree 298 of 500\n",
      "building tree 299 of 500\n",
      "building tree 300 of 500\n",
      "building tree 301 of 500\n",
      "building tree 302 of 500\n",
      "building tree 303 of 500\n",
      "building tree 304 of 500\n",
      "building tree 305 of 500\n",
      "building tree 306 of 500\n",
      "building tree 307 of 500\n",
      "building tree 308 of 500\n",
      "building tree 309 of 500\n",
      "building tree 310 of 500\n",
      "building tree 311 of 500\n",
      "building tree 312 of 500\n",
      "building tree 313 of 500\n",
      "building tree 314 of 500\n",
      "building tree 315 of 500\n",
      "building tree 316 of 500\n",
      "building tree 317 of 500\n",
      "building tree 318 of 500\n",
      "building tree 319 of 500\n",
      "building tree 320 of 500\n",
      "building tree 321 of 500\n",
      "building tree 322 of 500\n",
      "building tree 323 of 500\n",
      "building tree 324 of 500\n",
      "building tree 325 of 500\n",
      "building tree 326 of 500\n",
      "building tree 327 of 500\n",
      "building tree 328 of 500\n",
      "building tree 329 of 500\n",
      "building tree 330 of 500\n",
      "building tree 331 of 500\n",
      "building tree 332 of 500\n",
      "building tree 333 of 500\n",
      "building tree 334 of 500\n",
      "building tree 335 of 500\n",
      "building tree 336 of 500\n",
      "building tree 337 of 500\n",
      "building tree 338 of 500\n",
      "building tree 339 of 500\n",
      "building tree 340 of 500\n",
      "building tree 341 of 500\n",
      "building tree 342 of 500\n",
      "building tree 343 of 500\n",
      "building tree 344 of 500\n",
      "building tree 345 of 500\n",
      "building tree 346 of 500\n",
      "building tree 347 of 500\n",
      "building tree 348 of 500\n",
      "building tree 349 of 500\n",
      "building tree 350 of 500\n",
      "building tree 351 of 500\n",
      "building tree 352 of 500\n",
      "building tree 353 of 500\n",
      "building tree 354 of 500\n",
      "building tree 355 of 500\n",
      "building tree 356 of 500\n",
      "building tree 357 of 500\n",
      "building tree 358 of 500\n",
      "building tree 359 of 500\n",
      "building tree 360 of 500\n",
      "building tree 361 of 500\n",
      "building tree 362 of 500\n",
      "building tree 363 of 500\n",
      "building tree 364 of 500\n",
      "building tree 365 of 500\n",
      "building tree 366 of 500\n",
      "building tree 367 of 500\n",
      "building tree 368 of 500\n",
      "building tree 369 of 500\n",
      "building tree 370 of 500\n",
      "building tree 371 of 500\n",
      "building tree 372 of 500\n",
      "building tree 373 of 500\n",
      "building tree 374 of 500\n",
      "building tree 375 of 500\n",
      "building tree 376 of 500\n",
      "building tree 377 of 500\n",
      "building tree 378 of 500\n",
      "building tree 379 of 500\n",
      "building tree 380 of 500\n",
      "building tree 381 of 500\n",
      "building tree 382 of 500\n",
      "building tree 383 of 500\n",
      "building tree 384 of 500\n",
      "building tree 385 of 500\n",
      "building tree 386 of 500\n",
      "building tree 387 of 500\n",
      "building tree 388 of 500\n",
      "building tree 389 of 500\n",
      "building tree 390 of 500\n",
      "building tree 391 of 500\n",
      "building tree 392 of 500\n",
      "building tree 393 of 500\n",
      "building tree 394 of 500\n",
      "building tree 395 of 500\n",
      "building tree 396 of 500\n",
      "building tree 397 of 500\n",
      "building tree 398 of 500\n",
      "building tree 399 of 500\n",
      "building tree 400 of 500\n",
      "building tree 401 of 500\n",
      "building tree 402 of 500\n",
      "building tree 403 of 500\n",
      "building tree 404 of 500\n",
      "building tree 405 of 500\n",
      "building tree 406 of 500\n",
      "building tree 407 of 500\n",
      "building tree 408 of 500\n",
      "building tree 409 of 500\n",
      "building tree 410 of 500\n",
      "building tree 411 of 500\n",
      "building tree 412 of 500\n",
      "building tree 413 of 500\n",
      "building tree 414 of 500\n",
      "building tree 415 of 500\n",
      "building tree 416 of 500\n",
      "building tree 417 of 500\n",
      "building tree 418 of 500\n",
      "building tree 419 of 500\n",
      "building tree 420 of 500\n",
      "building tree 421 of 500\n",
      "building tree 422 of 500\n",
      "building tree 423 of 500\n",
      "building tree 424 of 500\n",
      "building tree 425 of 500\n",
      "building tree 426 of 500\n",
      "building tree 427 of 500\n",
      "building tree 428 of 500\n",
      "building tree 429 of 500\n",
      "building tree 430 of 500\n",
      "building tree 431 of 500\n",
      "building tree 432 of 500\n",
      "building tree 433 of 500\n",
      "building tree 434 of 500\n",
      "building tree 435 of 500\n",
      "building tree 436 of 500\n",
      "building tree 437 of 500\n",
      "building tree 438 of 500\n",
      "building tree 439 of 500\n",
      "building tree 440 of 500\n",
      "building tree 441 of 500\n",
      "building tree 442 of 500\n",
      "building tree 443 of 500\n",
      "building tree 444 of 500\n",
      "building tree 445 of 500\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=-1)]: Done 442 tasks      | elapsed:  4.5min\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "building tree 446 of 500\n",
      "building tree 447 of 500\n",
      "building tree 448 of 500\n",
      "building tree 449 of 500\n",
      "building tree 450 of 500\n",
      "building tree 451 of 500\n",
      "building tree 452 of 500\n",
      "building tree 453 of 500\n",
      "building tree 454 of 500\n",
      "building tree 455 of 500\n",
      "building tree 456 of 500\n",
      "building tree 457 of 500\n",
      "building tree 458 of 500\n",
      "building tree 459 of 500\n",
      "building tree 460 of 500\n",
      "building tree 461 of 500\n",
      "building tree 462 of 500\n",
      "building tree 463 of 500\n",
      "building tree 464 of 500\n",
      "building tree 465 of 500\n",
      "building tree 466 of 500\n",
      "building tree 467 of 500\n",
      "building tree 468 of 500\n",
      "building tree 469 of 500\n",
      "building tree 470 of 500\n",
      "building tree 471 of 500\n",
      "building tree 472 of 500\n",
      "building tree 473 of 500\n",
      "building tree 474 of 500\n",
      "building tree 475 of 500\n",
      "building tree 476 of 500\n",
      "building tree 477 of 500\n",
      "building tree 478 of 500\n",
      "building tree 479 of 500\n",
      "building tree 480 of 500\n",
      "building tree 481 of 500\n",
      "building tree 482 of 500\n",
      "building tree 483 of 500\n",
      "building tree 484 of 500\n",
      "building tree 485 of 500\n",
      "building tree 486 of 500\n",
      "building tree 487 of 500\n",
      "building tree 488 of 500\n",
      "building tree 489 of 500\n",
      "building tree 490 of 500\n",
      "building tree 491 of 500\n",
      "building tree 492 of 500\n",
      "building tree 493 of 500\n",
      "building tree 494 of 500\n",
      "building tree 495 of 500\n",
      "building tree 496 of 500\n",
      "building tree 497 of 500\n",
      "building tree 498 of 500\n",
      "building tree 499 of 500\n",
      "building tree 500 of 500\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:  5.0min finished\n",
      "[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.\n",
      "[Parallel(n_jobs=4)]: Done  10 tasks      | elapsed:    0.1s\n",
      "[Parallel(n_jobs=4)]: Done  64 tasks      | elapsed:    0.7s\n",
      "[Parallel(n_jobs=4)]: Done 154 tasks      | elapsed:    1.8s\n",
      "[Parallel(n_jobs=4)]: Done 280 tasks      | elapsed:    3.2s\n",
      "[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    5.0s\n",
      "[Parallel(n_jobs=4)]: Done 500 out of 500 | elapsed:    5.6s finished\n",
      "[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 4 concurrent workers.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "building tree 1 of 500\n",
      "building tree 2 of 500building tree 3 of 500\n",
      "building tree 4 of 500\n",
      "\n",
      "building tree 5 of 500\n",
      "building tree 6 of 500\n",
      "building tree 7 of 500\n",
      "building tree 8 of 500\n",
      "building tree 9 of 500\n",
      "building tree 10 of 500\n",
      "building tree 11 of 500\n",
      "building tree 12 of 500\n",
      "building tree 13 of 500\n",
      "building tree 14 of 500\n",
      "building tree 15 of 500\n",
      "building tree 16 of 500\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:    6.9s\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "building tree 17 of 500\n",
      "building tree 18 of 500\n",
      "building tree 19 of 500\n",
      "building tree 20 of 500\n",
      "building tree 21 of 500\n",
      "building tree 22 of 500\n",
      "building tree 23 of 500\n",
      "building tree 24 of 500\n",
      "building tree 25 of 500\n",
      "building tree 26 of 500\n",
      "building tree 27 of 500\n",
      "building tree 28 of 500\n",
      "building tree 29 of 500\n",
      "building tree 30 of 500\n",
      "building tree 31 of 500\n",
      "building tree 32 of 500\n",
      "building tree 33 of 500\n",
      "building tree 34 of 500\n",
      "building tree 35 of 500\n",
      "building tree 36 of 500\n",
      "building tree 37 of 500\n",
      "building tree 38 of 500\n",
      "building tree 39 of 500\n",
      "building tree 40 of 500\n",
      "building tree 41 of 500\n",
      "building tree 42 of 500\n",
      "building tree 43 of 500\n",
      "building tree 44 of 500\n",
      "building tree 45 of 500\n",
      "building tree 46 of 500\n",
      "building tree 47 of 500\n",
      "building tree 48 of 500\n",
      "building tree 49 of 500\n",
      "building tree 50 of 500\n",
      "building tree 51 of 500\n",
      "building tree 52 of 500\n",
      "building tree 53 of 500\n",
      "building tree 54 of 500\n",
      "building tree 55 of 500\n",
      "building tree 56 of 500\n",
      "building tree 57 of 500\n",
      "building tree 58 of 500\n",
      "building tree 59 of 500\n",
      "building tree 60 of 500\n",
      "building tree 61 of 500\n",
      "building tree 62 of 500\n",
      "building tree 63 of 500\n",
      "building tree 64 of 500\n",
      "building tree 65 of 500\n",
      "building tree 66 of 500\n",
      "building tree 67 of 500\n",
      "building tree 68 of 500\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:   36.9s\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "building tree 69 of 500\n",
      "building tree 70 of 500\n",
      "building tree 71 of 500\n",
      "building tree 72 of 500\n",
      "building tree 73 of 500\n",
      "building tree 74 of 500\n",
      "building tree 75 of 500\n",
      "building tree 76 of 500\n",
      "building tree 77 of 500\n",
      "building tree 78 of 500\n",
      "building tree 79 of 500\n",
      "building tree 80 of 500\n",
      "building tree 81 of 500\n",
      "building tree 82 of 500\n",
      "building tree 83 of 500\n",
      "building tree 84 of 500\n",
      "building tree 85 of 500\n",
      "building tree 86 of 500\n",
      "building tree 87 of 500\n",
      "building tree 88 of 500\n",
      "building tree 89 of 500\n",
      "building tree 90 of 500\n",
      "building tree 91 of 500\n",
      "building tree 92 of 500\n",
      "building tree 93 of 500\n",
      "building tree 94 of 500\n",
      "building tree 95 of 500\n",
      "building tree 96 of 500\n",
      "building tree 97 of 500\n",
      "building tree 98 of 500\n",
      "building tree 99 of 500\n",
      "building tree 100 of 500\n",
      "building tree 101 of 500\n",
      "building tree 102 of 500\n",
      "building tree 103 of 500\n",
      "building tree 104 of 500\n",
      "building tree 105 of 500\n",
      "building tree 106 of 500\n",
      "building tree 107 of 500\n",
      "building tree 108 of 500\n",
      "building tree 109 of 500\n",
      "building tree 110 of 500\n",
      "building tree 111 of 500\n",
      "building tree 112 of 500\n",
      "building tree 113 of 500\n",
      "building tree 114 of 500\n",
      "building tree 115 of 500\n",
      "building tree 116 of 500\n",
      "building tree 117 of 500\n",
      "building tree 118 of 500\n",
      "building tree 119 of 500\n",
      "building tree 120 of 500\n",
      "building tree 121 of 500\n",
      "building tree 122 of 500\n",
      "building tree 123 of 500\n",
      "building tree 124 of 500\n",
      "building tree 125 of 500\n",
      "building tree 126 of 500\n",
      "building tree 127 of 500\n",
      "building tree 128 of 500\n",
      "building tree 129 of 500\n",
      "building tree 130 of 500\n",
      "building tree 131 of 500\n",
      "building tree 132 of 500\n",
      "building tree 133 of 500\n",
      "building tree 134 of 500\n",
      "building tree 135 of 500\n",
      "building tree 136 of 500\n",
      "building tree 137 of 500\n",
      "building tree 138 of 500\n",
      "building tree 139 of 500\n",
      "building tree 140 of 500\n",
      "building tree 141 of 500\n",
      "building tree 142 of 500\n",
      "building tree 143 of 500\n",
      "building tree 144 of 500\n",
      "building tree 145 of 500\n",
      "building tree 146 of 500\n",
      "building tree 147 of 500\n",
      "building tree 148 of 500\n",
      "building tree 149 of 500\n",
      "building tree 150 of 500\n",
      "building tree 151 of 500\n",
      "building tree 152 of 500\n",
      "building tree 153 of 500\n",
      "building tree 154 of 500\n",
      "building tree 155 of 500\n",
      "building tree 156 of 500\n",
      "building tree 157 of 500\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=-1)]: Done 154 tasks      | elapsed:  1.5min\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "building tree 158 of 500\n",
      "building tree 159 of 500\n",
      "building tree 160 of 500\n",
      "building tree 161 of 500\n",
      "building tree 162 of 500\n",
      "building tree 163 of 500\n",
      "building tree 164 of 500\n",
      "building tree 165 of 500\n",
      "building tree 166 of 500\n",
      "building tree 167 of 500\n",
      "building tree 168 of 500\n",
      "building tree 169 of 500\n",
      "building tree 170 of 500\n",
      "building tree 171 of 500\n",
      "building tree 172 of 500\n",
      "building tree 173 of 500\n",
      "building tree 174 of 500\n",
      "building tree 175 of 500\n",
      "building tree 176 of 500\n",
      "building tree 177 of 500\n",
      "building tree 178 of 500\n",
      "building tree 179 of 500\n",
      "building tree 180 of 500\n",
      "building tree 181 of 500\n",
      "building tree 182 of 500\n",
      "building tree 183 of 500\n",
      "building tree 184 of 500\n",
      "building tree 185 of 500\n",
      "building tree 186 of 500\n",
      "building tree 187 of 500\n",
      "building tree 188 of 500\n",
      "building tree 189 of 500\n",
      "building tree 190 of 500\n",
      "building tree 191 of 500\n",
      "building tree 192 of 500\n",
      "building tree 193 of 500\n",
      "building tree 194 of 500\n",
      "building tree 195 of 500\n",
      "building tree 196 of 500\n",
      "building tree 197 of 500\n",
      "building tree 198 of 500\n",
      "building tree 199 of 500\n",
      "building tree 200 of 500\n",
      "building tree 201 of 500\n",
      "building tree 202 of 500\n",
      "building tree 203 of 500\n",
      "building tree 204 of 500\n",
      "building tree 205 of 500\n",
      "building tree 206 of 500\n",
      "building tree 207 of 500\n",
      "building tree 208 of 500\n",
      "building tree 209 of 500\n",
      "building tree 210 of 500\n",
      "building tree 211 of 500\n",
      "building tree 212 of 500\n",
      "building tree 213 of 500\n",
      "building tree 214 of 500\n",
      "building tree 215 of 500\n",
      "building tree 216 of 500\n",
      "building tree 217 of 500\n",
      "building tree 218 of 500\n",
      "building tree 219 of 500\n",
      "building tree 220 of 500\n",
      "building tree 221 of 500\n",
      "building tree 222 of 500\n",
      "building tree 223 of 500\n",
      "building tree 224 of 500\n",
      "building tree 225 of 500\n",
      "building tree 226 of 500\n",
      "building tree 227 of 500\n",
      "building tree 228 of 500\n",
      "building tree 229 of 500\n",
      "building tree 230 of 500\n",
      "building tree 231 of 500\n",
      "building tree 232 of 500\n",
      "building tree 233 of 500\n",
      "building tree 234 of 500\n",
      "building tree 235 of 500\n",
      "building tree 236 of 500\n",
      "building tree 237 of 500\n",
      "building tree 238 of 500\n",
      "building tree 239 of 500\n",
      "building tree 240 of 500\n",
      "building tree 241 of 500\n",
      "building tree 242 of 500\n",
      "building tree 243 of 500\n",
      "building tree 244 of 500\n",
      "building tree 245 of 500\n",
      "building tree 246 of 500\n",
      "building tree 247 of 500\n",
      "building tree 248 of 500\n",
      "building tree 249 of 500\n",
      "building tree 250 of 500\n",
      "building tree 251 of 500\n",
      "building tree 252 of 500\n",
      "building tree 253 of 500\n",
      "building tree 254 of 500\n",
      "building tree 255 of 500\n",
      "building tree 256 of 500\n",
      "building tree 257 of 500\n",
      "building tree 258 of 500\n",
      "building tree 259 of 500\n",
      "building tree 260 of 500\n",
      "building tree 261 of 500\n",
      "building tree 262 of 500\n",
      "building tree 263 of 500\n",
      "building tree 264 of 500\n",
      "building tree 265 of 500\n",
      "building tree 266 of 500\n",
      "building tree 267 of 500\n",
      "building tree 268 of 500\n",
      "building tree 269 of 500\n",
      "building tree 270 of 500\n",
      "building tree 271 of 500\n",
      "building tree 272 of 500\n",
      "building tree 273 of 500\n",
      "building tree 274 of 500\n",
      "building tree 275 of 500\n",
      "building tree 276 of 500\n",
      "building tree 277 of 500\n",
      "building tree 278 of 500\n",
      "building tree 279 of 500\n",
      "building tree 280 of 500\n",
      "building tree 281 of 500\n",
      "building tree 282 of 500\n",
      "building tree 283 of 500\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=-1)]: Done 280 tasks      | elapsed:  2.7min\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "building tree 284 of 500\n",
      "building tree 285 of 500\n",
      "building tree 286 of 500\n",
      "building tree 287 of 500\n",
      "building tree 288 of 500\n",
      "building tree 289 of 500\n",
      "building tree 290 of 500\n",
      "building tree 291 of 500\n",
      "building tree 292 of 500\n",
      "building tree 293 of 500\n",
      "building tree 294 of 500\n",
      "building tree 295 of 500\n",
      "building tree 296 of 500\n",
      "building tree 297 of 500\n",
      "building tree 298 of 500\n",
      "building tree 299 of 500\n",
      "building tree 300 of 500\n",
      "building tree 301 of 500\n",
      "building tree 302 of 500\n",
      "building tree 303 of 500\n",
      "building tree 304 of 500\n",
      "building tree 305 of 500\n",
      "building tree 306 of 500\n",
      "building tree 307 of 500\n",
      "building tree 308 of 500\n",
      "building tree 309 of 500\n",
      "building tree 310 of 500\n",
      "building tree 311 of 500\n",
      "building tree 312 of 500\n",
      "building tree 313 of 500\n",
      "building tree 314 of 500\n",
      "building tree 315 of 500\n",
      "building tree 316 of 500\n",
      "building tree 317 of 500\n",
      "building tree 318 of 500\n",
      "building tree 319 of 500\n",
      "building tree 320 of 500\n",
      "building tree 321 of 500\n",
      "building tree 322 of 500\n",
      "building tree 323 of 500\n",
      "building tree 324 of 500\n",
      "building tree 325 of 500\n",
      "building tree 326 of 500\n",
      "building tree 327 of 500\n",
      "building tree 328 of 500\n",
      "building tree 329 of 500\n",
      "building tree 330 of 500\n",
      "building tree 331 of 500\n",
      "building tree 332 of 500\n",
      "building tree 333 of 500\n",
      "building tree 334 of 500\n",
      "building tree 335 of 500\n",
      "building tree 336 of 500\n",
      "building tree 337 of 500\n",
      "building tree 338 of 500\n",
      "building tree 339 of 500\n",
      "building tree 340 of 500\n",
      "building tree 341 of 500\n",
      "building tree 342 of 500\n",
      "building tree 343 of 500\n",
      "building tree 344 of 500\n",
      "building tree 345 of 500\n",
      "building tree 346 of 500\n",
      "building tree 347 of 500\n",
      "building tree 348 of 500\n",
      "building tree 349 of 500\n",
      "building tree 350 of 500\n",
      "building tree 351 of 500\n",
      "building tree 352 of 500\n",
      "building tree 353 of 500\n",
      "building tree 354 of 500\n",
      "building tree 355 of 500\n",
      "building tree 356 of 500\n",
      "building tree 357 of 500\n",
      "building tree 358 of 500\n",
      "building tree 359 of 500\n",
      "building tree 360 of 500\n",
      "building tree 361 of 500\n",
      "building tree 362 of 500\n",
      "building tree 363 of 500\n",
      "building tree 364 of 500\n",
      "building tree 365 of 500\n",
      "building tree 366 of 500\n",
      "building tree 367 of 500\n",
      "building tree 368 of 500\n",
      "building tree 369 of 500\n",
      "building tree 370 of 500\n",
      "building tree 371 of 500\n",
      "building tree 372 of 500\n",
      "building tree 373 of 500\n",
      "building tree 374 of 500\n",
      "building tree 375 of 500\n",
      "building tree 376 of 500\n",
      "building tree 377 of 500\n",
      "building tree 378 of 500\n",
      "building tree 379 of 500\n",
      "building tree 380 of 500\n",
      "building tree 381 of 500\n",
      "building tree 382 of 500\n",
      "building tree 383 of 500\n",
      "building tree 384 of 500\n",
      "building tree 385 of 500\n",
      "building tree 386 of 500\n",
      "building tree 387 of 500\n",
      "building tree 388 of 500\n",
      "building tree 389 of 500\n",
      "building tree 390 of 500\n",
      "building tree 391 of 500\n",
      "building tree 392 of 500\n",
      "building tree 393 of 500\n",
      "building tree 394 of 500\n",
      "building tree 395 of 500\n",
      "building tree 396 of 500\n",
      "building tree 397 of 500\n",
      "building tree 398 of 500\n",
      "building tree 399 of 500\n",
      "building tree 400 of 500\n",
      "building tree 401 of 500\n",
      "building tree 402 of 500\n",
      "building tree 403 of 500\n",
      "building tree 404 of 500\n",
      "building tree 405 of 500\n",
      "building tree 406 of 500\n",
      "building tree 407 of 500\n",
      "building tree 408 of 500\n",
      "building tree 409 of 500\n",
      "building tree 410 of 500\n",
      "building tree 411 of 500\n",
      "building tree 412 of 500\n",
      "building tree 413 of 500\n",
      "building tree 414 of 500\n",
      "building tree 415 of 500\n",
      "building tree 416 of 500\n",
      "building tree 417 of 500\n",
      "building tree 418 of 500\n",
      "building tree 419 of 500\n",
      "building tree 420 of 500\n",
      "building tree 421 of 500\n",
      "building tree 422 of 500\n",
      "building tree 423 of 500\n",
      "building tree 424 of 500\n",
      "building tree 425 of 500\n",
      "building tree 426 of 500\n",
      "building tree 427 of 500\n",
      "building tree 428 of 500\n",
      "building tree 429 of 500\n",
      "building tree 430 of 500\n",
      "building tree 431 of 500\n",
      "building tree 432 of 500\n",
      "building tree 433 of 500\n",
      "building tree 434 of 500\n",
      "building tree 435 of 500\n",
      "building tree 436 of 500\n",
      "building tree 437 of 500\n",
      "building tree 438 of 500\n",
      "building tree 439 of 500\n",
      "building tree 440 of 500\n",
      "building tree 441 of 500\n",
      "building tree 442 of 500\n",
      "building tree 443 of 500\n",
      "building tree 444 of 500\n",
      "building tree 445 of 500\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=-1)]: Done 442 tasks      | elapsed:  4.2min\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "building tree 446 of 500\n",
      "building tree 447 of 500\n",
      "building tree 448 of 500\n",
      "building tree 449 of 500\n",
      "building tree 450 of 500\n",
      "building tree 451 of 500\n",
      "building tree 452 of 500\n",
      "building tree 453 of 500\n",
      "building tree 454 of 500\n",
      "building tree 455 of 500\n",
      "building tree 456 of 500\n",
      "building tree 457 of 500\n",
      "building tree 458 of 500\n",
      "building tree 459 of 500\n",
      "building tree 460 of 500\n",
      "building tree 461 of 500\n",
      "building tree 462 of 500\n",
      "building tree 463 of 500\n",
      "building tree 464 of 500\n",
      "building tree 465 of 500\n",
      "building tree 466 of 500\n",
      "building tree 467 of 500\n",
      "building tree 468 of 500\n",
      "building tree 469 of 500\n",
      "building tree 470 of 500\n",
      "building tree 471 of 500\n",
      "building tree 472 of 500\n",
      "building tree 473 of 500\n",
      "building tree 474 of 500\n",
      "building tree 475 of 500\n",
      "building tree 476 of 500\n",
      "building tree 477 of 500\n",
      "building tree 478 of 500\n",
      "building tree 479 of 500\n",
      "building tree 480 of 500\n",
      "building tree 481 of 500\n",
      "building tree 482 of 500\n",
      "building tree 483 of 500\n",
      "building tree 484 of 500\n",
      "building tree 485 of 500\n",
      "building tree 486 of 500\n",
      "building tree 487 of 500\n",
      "building tree 488 of 500\n",
      "building tree 489 of 500\n",
      "building tree 490 of 500\n",
      "building tree 491 of 500\n",
      "building tree 492 of 500\n",
      "building tree 493 of 500\n",
      "building tree 494 of 500\n",
      "building tree 495 of 500\n",
      "building tree 496 of 500\n",
      "building tree 497 of 500\n",
      "building tree 498 of 500\n",
      "building tree 499 of 500\n",
      "building tree 500 of 500\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:  4.8min finished\n",
      "[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.\n",
      "[Parallel(n_jobs=4)]: Done  10 tasks      | elapsed:    0.1s\n",
      "[Parallel(n_jobs=4)]: Done  64 tasks      | elapsed:    0.7s\n",
      "[Parallel(n_jobs=4)]: Done 154 tasks      | elapsed:    1.7s\n",
      "[Parallel(n_jobs=4)]: Done 280 tasks      | elapsed:    3.1s\n",
      "[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    4.9s\n",
      "[Parallel(n_jobs=4)]: Done 500 out of 500 | elapsed:    5.6s finished\n",
      "[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 4 concurrent workers.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "convergence after 2047 epochs took 1729 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed: 28.8min finished\n",
      "[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 4 concurrent workers.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "building tree 1 of 500\n",
      "building tree 2 of 500building tree 3 of 500\n",
      "\n",
      "building tree 4 of 500\n",
      "building tree 5 of 500\n",
      "building tree 6 of 500\n",
      "building tree 7 of 500\n",
      "building tree 8 of 500\n",
      "building tree 9 of 500\n",
      "building tree 10 of 500\n",
      "building tree 11 of 500\n",
      "building tree 12 of 500\n",
      "building tree 13 of 500\n",
      "building tree 14 of 500\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:   15.4s\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "building tree 15 of 500\n",
      "building tree 16 of 500\n",
      "building tree 17 of 500\n",
      "building tree 18 of 500\n",
      "building tree 19 of 500\n",
      "building tree 20 of 500\n",
      "building tree 21 of 500\n",
      "building tree 22 of 500\n",
      "building tree 23 of 500\n",
      "building tree 24 of 500\n",
      "building tree 25 of 500\n",
      "building tree 26 of 500\n",
      "building tree 27 of 500\n",
      "building tree 28 of 500\n",
      "building tree 29 of 500\n",
      "building tree 30 of 500\n",
      "building tree 31 of 500\n",
      "building tree 32 of 500\n",
      "building tree 33 of 500\n",
      "building tree 34 of 500\n",
      "building tree 35 of 500\n",
      "building tree 36 of 500\n",
      "building tree 37 of 500\n",
      "building tree 38 of 500\n",
      "building tree 39 of 500\n",
      "building tree 40 of 500\n",
      "building tree 41 of 500\n",
      "building tree 42 of 500\n",
      "building tree 43 of 500\n",
      "building tree 44 of 500\n",
      "building tree 45 of 500\n",
      "building tree 46 of 500\n",
      "building tree 47 of 500\n",
      "building tree 48 of 500\n",
      "building tree 49 of 500\n",
      "building tree 50 of 500\n",
      "building tree 51 of 500\n",
      "building tree 52 of 500\n",
      "building tree 53 of 500\n",
      "building tree 54 of 500\n",
      "building tree 55 of 500\n",
      "building tree 56 of 500\n",
      "building tree 57 of 500\n",
      "building tree 58 of 500\n",
      "building tree 59 of 500\n",
      "building tree 60 of 500\n",
      "building tree 61 of 500\n",
      "building tree 62 of 500\n",
      "building tree 63 of 500\n",
      "building tree 64 of 500\n",
      "building tree 65 of 500\n",
      "building tree 66 of 500\n",
      "building tree 67 of 500\n",
      "building tree 68 of 500\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:  1.4min\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "building tree 69 of 500\n",
      "building tree 70 of 500\n",
      "building tree 71 of 500\n",
      "building tree 72 of 500\n",
      "building tree 73 of 500\n",
      "building tree 74 of 500\n",
      "building tree 75 of 500\n",
      "building tree 76 of 500\n",
      "building tree 77 of 500\n",
      "building tree 78 of 500\n",
      "building tree 79 of 500\n",
      "building tree 80 of 500\n",
      "building tree 81 of 500\n",
      "building tree 82 of 500\n",
      "building tree 83 of 500\n",
      "building tree 84 of 500\n",
      "building tree 85 of 500\n",
      "building tree 86 of 500\n",
      "building tree 87 of 500\n",
      "building tree 88 of 500\n",
      "building tree 89 of 500\n",
      "building tree 90 of 500\n",
      "building tree 91 of 500\n",
      "building tree 92 of 500\n",
      "building tree 93 of 500\n",
      "building tree 94 of 500\n",
      "building tree 95 of 500\n",
      "building tree 96 of 500\n",
      "building tree 97 of 500\n",
      "building tree 98 of 500\n",
      "building tree 99 of 500\n",
      "building tree 100 of 500\n",
      "building tree 101 of 500\n",
      "building tree 102 of 500\n",
      "building tree 103 of 500\n",
      "building tree 104 of 500\n",
      "building tree 105 of 500\n",
      "building tree 106 of 500\n",
      "building tree 107 of 500\n",
      "building tree 108 of 500\n",
      "building tree 109 of 500\n",
      "building tree 110 of 500\n",
      "building tree 111 of 500\n",
      "building tree 112 of 500\n",
      "building tree 113 of 500\n",
      "building tree 114 of 500\n",
      "building tree 115 of 500\n",
      "building tree 116 of 500\n",
      "building tree 117 of 500\n",
      "building tree 118 of 500\n",
      "building tree 119 of 500\n",
      "building tree 120 of 500\n",
      "building tree 121 of 500\n",
      "building tree 122 of 500\n",
      "building tree 123 of 500\n",
      "building tree 124 of 500\n",
      "building tree 125 of 500\n",
      "building tree 126 of 500\n",
      "building tree 127 of 500\n",
      "building tree 128 of 500\n",
      "building tree 129 of 500\n",
      "building tree 130 of 500\n",
      "building tree 131 of 500\n",
      "building tree 132 of 500\n",
      "building tree 133 of 500\n",
      "building tree 134 of 500\n",
      "building tree 135 of 500\n",
      "building tree 136 of 500\n",
      "building tree 137 of 500\n",
      "building tree 138 of 500\n",
      "building tree 139 of 500\n",
      "building tree 140 of 500\n",
      "building tree 141 of 500\n",
      "building tree 142 of 500\n",
      "building tree 143 of 500\n",
      "building tree 144 of 500\n",
      "building tree 145 of 500\n",
      "building tree 146 of 500\n",
      "building tree 147 of 500\n",
      "building tree 148 of 500\n",
      "building tree 149 of 500\n",
      "building tree 150 of 500\n",
      "building tree 151 of 500\n",
      "building tree 152 of 500\n",
      "building tree 153 of 500\n",
      "building tree 154 of 500\n",
      "building tree 155 of 500\n",
      "building tree 156 of 500\n",
      "building tree 157 of 500\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=-1)]: Done 154 tasks      | elapsed:  3.6min\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "building tree 158 of 500\n",
      "building tree 159 of 500\n",
      "building tree 160 of 500\n",
      "building tree 161 of 500\n",
      "building tree 162 of 500\n",
      "building tree 163 of 500\n",
      "building tree 164 of 500\n",
      "building tree 165 of 500\n",
      "building tree 166 of 500\n",
      "building tree 167 of 500\n",
      "building tree 168 of 500\n",
      "building tree 169 of 500\n",
      "building tree 170 of 500\n",
      "building tree 171 of 500\n",
      "building tree 172 of 500\n",
      "building tree 173 of 500\n",
      "building tree 174 of 500\n",
      "building tree 175 of 500\n",
      "building tree 176 of 500\n",
      "building tree 177 of 500\n",
      "building tree 178 of 500\n",
      "building tree 179 of 500\n",
      "building tree 180 of 500\n",
      "building tree 181 of 500\n",
      "building tree 182 of 500\n",
      "building tree 183 of 500\n",
      "building tree 184 of 500\n",
      "building tree 185 of 500\n",
      "building tree 186 of 500\n",
      "building tree 187 of 500\n",
      "building tree 188 of 500\n",
      "building tree 189 of 500\n",
      "building tree 190 of 500\n",
      "building tree 191 of 500\n",
      "building tree 192 of 500\n",
      "building tree 193 of 500\n",
      "building tree 194 of 500\n",
      "building tree 195 of 500\n",
      "building tree 196 of 500\n",
      "building tree 197 of 500\n",
      "building tree 198 of 500\n",
      "building tree 199 of 500\n",
      "building tree 200 of 500\n",
      "building tree 201 of 500\n",
      "building tree 202 of 500\n",
      "building tree 203 of 500\n",
      "building tree 204 of 500\n",
      "building tree 205 of 500\n",
      "building tree 206 of 500\n",
      "building tree 207 of 500\n",
      "building tree 208 of 500\n",
      "building tree 209 of 500\n",
      "building tree 210 of 500\n",
      "building tree 211 of 500\n",
      "building tree 212 of 500\n",
      "building tree 213 of 500\n",
      "building tree 214 of 500\n",
      "building tree 215 of 500\n",
      "building tree 216 of 500\n",
      "building tree 217 of 500\n",
      "building tree 218 of 500\n",
      "building tree 219 of 500\n",
      "building tree 220 of 500\n",
      "building tree 221 of 500\n",
      "building tree 222 of 500\n",
      "building tree 223 of 500\n",
      "building tree 224 of 500\n",
      "building tree 225 of 500\n",
      "building tree 226 of 500\n",
      "building tree 227 of 500\n",
      "building tree 228 of 500\n",
      "building tree 229 of 500\n",
      "building tree 230 of 500\n",
      "building tree 231 of 500\n",
      "building tree 232 of 500\n",
      "building tree 233 of 500\n",
      "building tree 234 of 500\n",
      "building tree 235 of 500\n",
      "building tree 236 of 500\n",
      "building tree 237 of 500building tree 238 of 500\n",
      "\n",
      "building tree 239 of 500\n",
      "building tree 240 of 500\n",
      "building tree 241 of 500\n",
      "building tree 242 of 500\n",
      "building tree 243 of 500\n",
      "building tree 244 of 500\n",
      "building tree 245 of 500\n",
      "building tree 246 of 500\n",
      "building tree 247 of 500\n",
      "building tree 248 of 500\n",
      "building tree 249 of 500\n",
      "building tree 250 of 500\n",
      "building tree 251 of 500\n",
      "building tree 252 of 500\n",
      "building tree 253 of 500\n",
      "building tree 254 of 500\n",
      "building tree 255 of 500\n",
      "building tree 256 of 500\n",
      "building tree 257 of 500\n",
      "building tree 258 of 500\n",
      "building tree 259 of 500\n",
      "building tree 260 of 500\n",
      "building tree 261 of 500\n",
      "building tree 262 of 500\n",
      "building tree 263 of 500\n",
      "building tree 264 of 500\n",
      "building tree 265 of 500\n",
      "building tree 266 of 500\n",
      "building tree 267 of 500\n",
      "building tree 268 of 500\n",
      "building tree 269 of 500\n",
      "building tree 270 of 500\n",
      "building tree 271 of 500\n",
      "building tree 272 of 500\n",
      "building tree 273 of 500\n",
      "building tree 274 of 500\n",
      "building tree 275 of 500\n",
      "building tree 276 of 500\n",
      "building tree 277 of 500\n",
      "building tree 278 of 500\n",
      "building tree 279 of 500\n",
      "building tree 280 of 500\n",
      "building tree 281 of 500\n",
      "building tree 282 of 500\n",
      "building tree 283 of 500\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=-1)]: Done 280 tasks      | elapsed:  6.3min\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "building tree 284 of 500\n",
      "building tree 285 of 500\n",
      "building tree 286 of 500\n",
      "building tree 287 of 500\n",
      "building tree 288 of 500\n",
      "building tree 289 of 500\n",
      "building tree 290 of 500\n",
      "building tree 291 of 500\n",
      "building tree 292 of 500\n",
      "building tree 293 of 500\n",
      "building tree 294 of 500\n",
      "building tree 295 of 500\n",
      "building tree 296 of 500\n",
      "building tree 297 of 500\n",
      "building tree 298 of 500\n",
      "building tree 299 of 500\n",
      "building tree 300 of 500\n",
      "building tree 301 of 500\n",
      "building tree 302 of 500\n",
      "building tree 303 of 500\n",
      "building tree 304 of 500\n",
      "building tree 305 of 500\n",
      "building tree 306 of 500\n",
      "building tree 307 of 500\n",
      "building tree 308 of 500\n",
      "building tree 309 of 500\n",
      "building tree 310 of 500\n",
      "building tree 311 of 500\n",
      "building tree 312 of 500\n",
      "building tree 313 of 500\n",
      "building tree 314 of 500\n",
      "building tree 315 of 500\n",
      "building tree 316 of 500\n",
      "building tree 317 of 500\n",
      "building tree 318 of 500\n",
      "building tree 319 of 500\n",
      "building tree 320 of 500\n",
      "building tree 321 of 500\n",
      "building tree 322 of 500\n",
      "building tree 323 of 500\n",
      "building tree 324 of 500\n",
      "building tree 325 of 500\n",
      "building tree 326 of 500\n",
      "building tree 327 of 500\n",
      "building tree 328 of 500\n",
      "building tree 329 of 500\n",
      "building tree 330 of 500\n",
      "building tree 331 of 500\n",
      "building tree 332 of 500\n",
      "building tree 333 of 500\n",
      "building tree 334 of 500\n",
      "building tree 335 of 500\n",
      "building tree 336 of 500\n",
      "building tree 337 of 500\n",
      "building tree 338 of 500\n",
      "building tree 339 of 500\n",
      "building tree 340 of 500\n",
      "building tree 341 of 500\n",
      "building tree 342 of 500\n",
      "building tree 343 of 500\n",
      "building tree 344 of 500\n",
      "building tree 345 of 500\n",
      "building tree 346 of 500\n",
      "building tree 347 of 500\n",
      "building tree 348 of 500\n",
      "building tree 349 of 500\n",
      "building tree 350 of 500\n",
      "building tree 351 of 500\n",
      "building tree 352 of 500\n",
      "building tree 353 of 500\n",
      "building tree 354 of 500\n",
      "building tree 355 of 500\n",
      "building tree 356 of 500\n",
      "building tree 357 of 500\n",
      "building tree 358 of 500\n",
      "building tree 359 of 500\n",
      "building tree 360 of 500\n",
      "building tree 361 of 500\n",
      "building tree 362 of 500\n",
      "building tree 363 of 500\n",
      "building tree 364 of 500\n",
      "building tree 365 of 500\n",
      "building tree 366 of 500\n",
      "building tree 367 of 500\n",
      "building tree 368 of 500\n",
      "building tree 369 of 500\n",
      "building tree 370 of 500\n",
      "building tree 371 of 500\n",
      "building tree 372 of 500\n",
      "building tree 373 of 500\n",
      "building tree 374 of 500\n",
      "building tree 375 of 500\n",
      "building tree 376 of 500\n",
      "building tree 377 of 500\n",
      "building tree 378 of 500\n",
      "building tree 379 of 500\n",
      "building tree 380 of 500\n",
      "building tree 381 of 500\n",
      "building tree 382 of 500\n",
      "building tree 383 of 500\n",
      "building tree 384 of 500\n",
      "building tree 385 of 500\n",
      "building tree 386 of 500\n",
      "building tree 387 of 500\n",
      "building tree 388 of 500\n",
      "building tree 389 of 500\n",
      "building tree 390 of 500\n",
      "building tree 391 of 500\n",
      "building tree 392 of 500\n",
      "building tree 393 of 500\n",
      "building tree 394 of 500\n",
      "building tree 395 of 500\n",
      "building tree 396 of 500\n",
      "building tree 397 of 500\n",
      "building tree 398 of 500\n",
      "building tree 399 of 500\n",
      "building tree 400 of 500\n",
      "building tree 401 of 500\n",
      "building tree 402 of 500\n",
      "building tree 403 of 500\n",
      "building tree 404 of 500\n",
      "building tree 405 of 500\n",
      "building tree 406 of 500\n",
      "building tree 407 of 500\n",
      "building tree 408 of 500\n",
      "building tree 409 of 500\n",
      "building tree 410 of 500\n",
      "building tree 411 of 500\n",
      "building tree 412 of 500\n",
      "building tree 413 of 500\n",
      "building tree 414 of 500\n",
      "building tree 415 of 500\n",
      "building tree 416 of 500\n",
      "building tree 417 of 500\n",
      "building tree 418 of 500\n",
      "building tree 419 of 500\n",
      "building tree 420 of 500\n",
      "building tree 421 of 500\n",
      "building tree 422 of 500\n",
      "building tree 423 of 500\n",
      "building tree 424 of 500\n",
      "building tree 425 of 500\n",
      "building tree 426 of 500\n",
      "building tree 427 of 500\n",
      "building tree 428 of 500\n",
      "building tree 429 of 500\n",
      "building tree 430 of 500\n",
      "building tree 431 of 500\n",
      "building tree 432 of 500\n",
      "building tree 433 of 500\n",
      "building tree 434 of 500\n",
      "building tree 435 of 500\n",
      "building tree 436 of 500\n",
      "building tree 437 of 500\n",
      "building tree 438 of 500\n",
      "building tree 439 of 500\n",
      "building tree 440 of 500\n",
      "building tree 441 of 500\n",
      "building tree 442 of 500\n",
      "building tree 443 of 500\n",
      "building tree 444 of 500\n",
      "building tree 445 of 500\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=-1)]: Done 442 tasks      | elapsed: 10.1min\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "building tree 446 of 500\n",
      "building tree 447 of 500\n",
      "building tree 448 of 500\n",
      "building tree 449 of 500\n",
      "building tree 450 of 500\n",
      "building tree 451 of 500\n",
      "building tree 452 of 500\n",
      "building tree 453 of 500\n",
      "building tree 454 of 500\n",
      "building tree 455 of 500\n",
      "building tree 456 of 500\n",
      "building tree 457 of 500\n",
      "building tree 458 of 500\n",
      "building tree 459 of 500\n",
      "building tree 460 of 500\n",
      "building tree 461 of 500\n",
      "building tree 462 of 500\n",
      "building tree 463 of 500\n",
      "building tree 464 of 500\n",
      "building tree 465 of 500\n",
      "building tree 466 of 500\n",
      "building tree 467 of 500\n",
      "building tree 468 of 500\n",
      "building tree 469 of 500\n",
      "building tree 470 of 500\n",
      "building tree 471 of 500\n",
      "building tree 472 of 500\n",
      "building tree 473 of 500\n",
      "building tree 474 of 500\n",
      "building tree 475 of 500\n",
      "building tree 476 of 500\n",
      "building tree 477 of 500\n",
      "building tree 478 of 500\n",
      "building tree 479 of 500\n",
      "building tree 480 of 500\n",
      "building tree 481 of 500\n",
      "building tree 482 of 500\n",
      "building tree 483 of 500\n",
      "building tree 484 of 500\n",
      "building tree 485 of 500\n",
      "building tree 486 of 500\n",
      "building tree 487 of 500\n",
      "building tree 488 of 500\n",
      "building tree 489 of 500\n",
      "building tree 490 of 500\n",
      "building tree 491 of 500\n",
      "building tree 492 of 500\n",
      "building tree 493 of 500\n",
      "building tree 494 of 500\n",
      "building tree 495 of 500\n",
      "building tree 496 of 500\n",
      "building tree 497 of 500\n",
      "building tree 498 of 500\n",
      "building tree 499 of 500\n",
      "building tree 500 of 500\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed: 11.4min finished\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 9h 29min 13s, sys: 3min 34s, total: 9h 32min 48s\n",
      "Wall time: 6h 37min 2s\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "StackingCVClassifier(classifiers=[LogisticRegression(C=1.2, class_weight=None, dual=False, fit_intercept=True,\n",
       "          intercept_scaling=1, max_iter=10000, multi_class='warn',\n",
       "          n_jobs=-1, penalty='l2', random_state=42, solver='sag',\n",
       "          tol=0.0001, verbose=1, warm_start=False), KNeighborsClassifier(algo...alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,\n",
       "       silent=True, subsample=0.8, verbose=2)],\n",
       "           cv=2,\n",
       "           meta_classifier=LGBMClassifier(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,\n",
       "        importance_type='split', learning_rate=0.1, max_depth=-1,\n",
       "        metric='binary_logloss', min_child_samples=20,\n",
       "        min_child_weight=0.001, min_split_gain=0.0, n_estimators=5000,\n",
       "        n_jobs=-1, num_leaves=31, objective='binary', random_state=42,\n",
       "        reg_alpha=0.0, reg_lambda=0.0, silent=True, subsample=1.0,\n",
       "        subsample_for_bin=200000, subsample_freq=0, verbose=1),\n",
       "           shuffle=True, store_train_meta_features=False, stratify=True,\n",
       "           use_clones=True, use_features_in_secondary=False,\n",
       "           use_probas=True, verbose=0)"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%%time\n",
    "sclf.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.\n",
      "[Parallel(n_jobs=4)]: Done  10 tasks      | elapsed:    2.6s\n",
      "[Parallel(n_jobs=4)]: Done  64 tasks      | elapsed:   11.9s\n",
      "[Parallel(n_jobs=4)]: Done 154 tasks      | elapsed:   27.5s\n",
      "[Parallel(n_jobs=4)]: Done 280 tasks      | elapsed:   49.2s\n",
      "[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:  1.2min\n",
      "[Parallel(n_jobs=4)]: Done 500 out of 500 | elapsed:  1.3min finished\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 7h 12min 2s, sys: 5min, total: 7h 17min 3s\n",
      "Wall time: 2h 33min 23s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "y_test_stacking_diverse = sclf.predict_proba(X_test)[:,1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[LogisticRegression(C=1.2, class_weight=None, dual=False, fit_intercept=True,\n",
       "           intercept_scaling=1, max_iter=10000, multi_class='warn',\n",
       "           n_jobs=-1, penalty='l2', random_state=42, solver='sag',\n",
       "           tol=0.0001, verbose=1, warm_start=False),\n",
       " KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',\n",
       "            metric_params=None, n_jobs=-1, n_neighbors=10, p=2,\n",
       "            weights='uniform'),\n",
       " GaussianNB(priors=None, var_smoothing=1e-09),\n",
       " RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n",
       "             max_depth=10, max_features='auto', max_leaf_nodes=None,\n",
       "             min_impurity_decrease=0.0, min_impurity_split=None,\n",
       "             min_samples_leaf=20, min_samples_split=10,\n",
       "             min_weight_fraction_leaf=0.0, n_estimators=500, n_jobs=-1,\n",
       "             oob_score=False, random_state=42, verbose=5, warm_start=False),\n",
       " AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,\n",
       "           learning_rate=0.1, n_estimators=500, random_state=42),\n",
       " LGBMClassifier(boosting_type='gbdt', class_weight=None,\n",
       "         colsample_bytree=0.486, importance_type='split',\n",
       "         learning_rate=0.01, max_depth=-1, metric='binary_logloss',\n",
       "         min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,\n",
       "         n_estimators=3600, n_jobs=-1, num_leaves=158, objective='binary',\n",
       "         random_state=42, reg_alpha=0.0, reg_lambda=50, silent=True,\n",
       "         subsample=1.0, subsample_for_bin=200000, subsample_freq=0,\n",
       "         verbose=1),\n",
       " XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,\n",
       "        colsample_bytree=0.7, eval_metric='logloss', gamma=0.005,\n",
       "        learning_rate=0.05, max_delta_step=1, max_depth=8,\n",
       "        min_child_weight=1, missing=None, n_estimators=640, n_jobs=-1,\n",
       "        nthread=None, objective='binary:logistic', random_state=42,\n",
       "        reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,\n",
       "        silent=True, subsample=0.8, verbose=2)]"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sclf.classifiers"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Stacking all classifiers - METHOD Two: use LGB with different seeds"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "# import random\n",
    "# random.seed(RANDOM_SEED)\n",
    "# seeds_seq = random.sample(list(range(100)), 10)\n",
    "seeds_seq = range(2010, 2020)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'objective': 'binary', 'metric': 'binary_logloss', 'boosting': 'gbdt', 'device': 'cpu', 'feature_fraction': 0.486, 'num_leaves': 158, 'lambda_l2': 50, 'learning_rate': 0.01, 'verbose': 1, 'seed': 2010}\n",
      "[0.00115705 0.14284598 0.37893153 ... 0.00009407 0.14053386 0.26256383]\n",
      "{'objective': 'binary', 'metric': 'binary_logloss', 'boosting': 'gbdt', 'device': 'cpu', 'feature_fraction': 0.486, 'num_leaves': 158, 'lambda_l2': 50, 'learning_rate': 0.01, 'verbose': 1, 'seed': 2011}\n",
      "[0.00109148 0.14668181 0.35703726 ... 0.00009453 0.16643379 0.28724285]\n",
      "{'objective': 'binary', 'metric': 'binary_logloss', 'boosting': 'gbdt', 'device': 'cpu', 'feature_fraction': 0.486, 'num_leaves': 158, 'lambda_l2': 50, 'learning_rate': 0.01, 'verbose': 1, 'seed': 2012}\n",
      "[0.00113004 0.14370029 0.42393051 ... 0.00009635 0.16578678 0.27052554]\n",
      "{'objective': 'binary', 'metric': 'binary_logloss', 'boosting': 'gbdt', 'device': 'cpu', 'feature_fraction': 0.486, 'num_leaves': 158, 'lambda_l2': 50, 'learning_rate': 0.01, 'verbose': 1, 'seed': 2013}\n",
      "[0.00126328 0.13591024 0.44200624 ... 0.00009148 0.13064874 0.24967315]\n",
      "{'objective': 'binary', 'metric': 'binary_logloss', 'boosting': 'gbdt', 'device': 'cpu', 'feature_fraction': 0.486, 'num_leaves': 158, 'lambda_l2': 50, 'learning_rate': 0.01, 'verbose': 1, 'seed': 2014}\n",
      "[0.00113164 0.14382097 0.39275264 ... 0.00008165 0.15953555 0.21440612]\n",
      "{'objective': 'binary', 'metric': 'binary_logloss', 'boosting': 'gbdt', 'device': 'cpu', 'feature_fraction': 0.486, 'num_leaves': 158, 'lambda_l2': 50, 'learning_rate': 0.01, 'verbose': 1, 'seed': 2015}\n",
      "[0.00131401 0.13486619 0.37380644 ... 0.00009903 0.16133133 0.25842165]\n",
      "{'objective': 'binary', 'metric': 'binary_logloss', 'boosting': 'gbdt', 'device': 'cpu', 'feature_fraction': 0.486, 'num_leaves': 158, 'lambda_l2': 50, 'learning_rate': 0.01, 'verbose': 1, 'seed': 2016}\n",
      "[0.00122934 0.14541416 0.41834715 ... 0.00008225 0.13190579 0.25482972]\n",
      "{'objective': 'binary', 'metric': 'binary_logloss', 'boosting': 'gbdt', 'device': 'cpu', 'feature_fraction': 0.486, 'num_leaves': 158, 'lambda_l2': 50, 'learning_rate': 0.01, 'verbose': 1, 'seed': 2017}\n",
      "[0.00133667 0.15002731 0.4248672  ... 0.00009829 0.14282908 0.23517834]\n",
      "{'objective': 'binary', 'metric': 'binary_logloss', 'boosting': 'gbdt', 'device': 'cpu', 'feature_fraction': 0.486, 'num_leaves': 158, 'lambda_l2': 50, 'learning_rate': 0.01, 'verbose': 1, 'seed': 2018}\n",
      "[0.0013278  0.16335245 0.40519865 ... 0.00009725 0.13799441 0.28680177]\n",
      "{'objective': 'binary', 'metric': 'binary_logloss', 'boosting': 'gbdt', 'device': 'cpu', 'feature_fraction': 0.486, 'num_leaves': 158, 'lambda_l2': 50, 'learning_rate': 0.01, 'verbose': 1, 'seed': 2019}\n",
      "[0.00114182 0.13879484 0.41536677 ... 0.00008915 0.14413654 0.27203425]\n",
      "CPU times: user 23h 14min 20s, sys: 8min 23s, total: 23h 22min 43s\n",
      "Wall time: 6h 24min 9s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "lgb_models = []\n",
    "lgb_preds = []\n",
    "lgb_params = {\n",
    "        'objective': 'binary',\n",
    "        'metric': 'binary_logloss',\n",
    "        'boosting': 'gbdt',\n",
    "        'device': 'cpu',\n",
    "        'feature_fraction': 0.486,\n",
    "        'num_leaves': 158,\n",
    "        'lambda_l2': 50,\n",
    "        'learning_rate': 0.01,\n",
    "        #'num_boost_round': 5000,\n",
    "        #'early_stopping_rounds': 10,\n",
    "        'verbose': 1\n",
    "    }\n",
    "\n",
    "train_data = lgb.Dataset(data = X_train, label = y_train, feature_name = df_train.columns.tolist())\n",
    "for i, seed in enumerate(seeds_seq):\n",
    "    lgb_params['seed'] = seed\n",
    "    #lgb_params['bagging_fraction_seed'] = seed\n",
    "    #lgb_params['feature_fraction_seed'] = seed\n",
    "    print(lgb_params)\n",
    "    lgb_clf = lgb.train(lgb_params, train_data,  num_boost_round=3600, verbose_eval=100)\n",
    "    lgb_preds.append(lgb_clf.predict(X_test).reshape(-1))\n",
    "    print(lgb_preds[i])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0.00121231, 0.14454142, 0.40322444, ..., 0.00009241, 0.14811359,\n",
       "       0.25916772])"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_test_stacking_lgb = np.mean(lgb_preds, axis=0)\n",
    "y_test_stacking_lgb"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Stacking with my own codes (commented out)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "# from sklearn.model_selection import StratifiedKFold"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "# kfold = StratifiedKFold(\n",
    "#     n_splits=NUM_FOLDS,\n",
    "#     shuffle=True,\n",
    "#     random_state=RANDOM_SEED\n",
    "# )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "# class SklearnHelper(object):\n",
    "#     def __init__(self, clf, seed=0, params=None):\n",
    "#         params['random_state'] = seed\n",
    "#         self.clf = clf(**params)\n",
    "\n",
    "#     def train(self, x_train, y_train):\n",
    "#         self.clf.fit(x_train, y_train)\n",
    "\n",
    "#     def predict(self, x):\n",
    "#         return self.clf.predict(x)\n",
    "    \n",
    "#     def predict_proba(self, x):\n",
    "#         return self.clf.predict_proba(x)[:,1]\n",
    "    \n",
    "#     def fit(self,x,y):\n",
    "#         return self.clf.fit(x,y)\n",
    "    \n",
    "#     def feature_importances(self,x,y):\n",
    "#         print(self.clf.fit(x,y).feature_importances_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "# class XGBWrapper(object):\n",
    "#     def __init__(self, seed=0, params=None):\n",
    "#         self.param = params\n",
    "#         self.param['seed'] = seed\n",
    "#         self.nrounds = params.pop('num_boost_round', 250)\n",
    "\n",
    "#     def train(self, x_train, y_train):\n",
    "#         dtrain = xgb.DMatrix(x_train, label=y_train)\n",
    "#         self.gbdt = xgb.train(self.param, dtrain, self.nrounds)\n",
    "\n",
    "#     def predict_proba(self, x_test):\n",
    "#         return self.gbdt.predict(xgb.DMatrix(x_test))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "# class LGBMWrapper(object):\n",
    "#     def __init__(self, seed=0, params=None):\n",
    "#         self.param = params\n",
    "#         self.param['seed'] = seed\n",
    "#         self.nrounds = params.pop('num_boost_round', 250)\n",
    "\n",
    "#     def train(self, x_train, y_train):\n",
    "#         dtrain = lgb.Dataset(x_train, label=y_train)\n",
    "#         self.lgbm = lgb.train(self.param, dtrain, self.nrounds)\n",
    "\n",
    "#     def predict_proba(self, x_test):\n",
    "#         return self.lgbm.predict(x_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "# def get_oof(clf, x_train, y_train, x_test, k_folds):\n",
    "#     n_splits = k_folds.get_n_splits() \n",
    "#     ntrain = x_train.shape[0]\n",
    "#     ntest = x_test.shape[0]\n",
    "#     oof_train = np.zeros((ntrain,))\n",
    "#     oof_test = np.zeros((ntest,))\n",
    "#     oof_test_skf = np.empty((n_splits, ntest))\n",
    "\n",
    "#     for i, (train_index, test_index) in enumerate(k_folds.split(x_train, y_train)):\n",
    "#         x_tr = x_train[train_index]\n",
    "#         y_tr = y_train[train_index]\n",
    "#         x_te = x_train[test_index]\n",
    "\n",
    "#         clf.train(x_tr, y_tr)\n",
    "\n",
    "#         oof_train[test_index] = clf.predict_proba(x_te)\n",
    "#         oof_test_skf[i, :] = clf.predict_proba(x_test)\n",
    "\n",
    "#     oof_test[:] = oof_test_skf.mean(axis=0)\n",
    "#     return oof_train.reshape(-1, 1), oof_test.reshape(-1, 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "# lr_params = {\n",
    "#              'solver' : 'sag', \n",
    "#              'penalty' : 'l2', \n",
    "#              'C' : 1.2, \n",
    "#              'max_iter' : 10000, \n",
    "#              'n_jobs' : -1, \n",
    "#              'verbose' : 1\n",
    "#         }\n",
    "\n",
    "\n",
    "# rfc_params = {\n",
    "#               'max_features' : 'auto',\n",
    "#               'max_depth' : 10,\n",
    "#               'min_samples_split' : 10,\n",
    "#               'min_samples_leaf' : 20,\n",
    "#               'n_estimators' : 500,\n",
    "#               'n_jobs' : -1, \n",
    "#               'verbose' : 5    \n",
    "#         }\n",
    "\n",
    "# #lr2 = LogisticRegression(random_state=RANDOM_SEED, n_jobs=-1)\n",
    "\n",
    "\n",
    "# lgbm_params = {\n",
    "#         'objective': 'binary',\n",
    "#         'metric': 'binary_logloss',\n",
    "#         'boosting': 'gbdt',\n",
    "#         'device': 'cpu',\n",
    "#         'feature_fraction': 0.486,\n",
    "#         'num_leaves': 158,\n",
    "#         'lambda_l2': 50,\n",
    "#         'learning_rate': 0.01,\n",
    "#         'num_boost_round': 3600,\n",
    "#         #'early_stopping_rounds': 10,\n",
    "#         'verbose': 1,\n",
    "#         'bagging_fraction_seed': RANDOM_SEED,\n",
    "#         'feature_fraction_seed': RANDOM_SEED,\n",
    "#     }\n",
    "\n",
    "\n",
    "# # lgbc2 = LGBMClassifier(n_estimators=3600, num_leaves=158, learning_rate=0.01, n_jobs=-1, \n",
    "# #                          reg_lambda=50, colsample_bytree=0.486, \n",
    "# #                         objective='binary', metric='binary_logloss', random_state=RANDOM_SEED, verbose=1)\n",
    "\n",
    "# xgbt_params={\n",
    "#     'max_depth':8,\n",
    "#     'nthread':14,\n",
    "#     'eta':0.05,\n",
    "#     'eval_metric':'logloss',\n",
    "#     'objective':'binary:logistic',\n",
    "#     'subsample':0.8,\n",
    "#     'colsample_bytree':0.7,\n",
    "#     'silent':1,\n",
    "#     #'seed':1123,\n",
    "#     'gamma':0.005,\n",
    "#     'num_boost_round': 640,\n",
    "#     'min_child_weight':1\n",
    "#     #'scale_pos_weight':0.3692\n",
    "# }\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "# lr = SklearnHelper(clf=LogisticRegression, seed=RANDOM_SEED, params=lr_params)\n",
    "# rfc = SklearnHelper(clf=RandomForestClassifier, seed=RANDOM_SEED, params=rfc_params)\n",
    "# xgbc = XGBWrapper(seed=RANDOM_SEED, params=xgbt_params)\n",
    "# lgbc = LGBMWrapper(seed=RANDOM_SEED, params=lgbm_params)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": [
    "# # Create our OOF train and test predictions. These base results will be used as new features\n",
    "# kfold = StratifiedKFold(\n",
    "#     n_splits=NUM_FOLDS,\n",
    "#     shuffle=True,\n",
    "#     random_state=RANDOM_SEED\n",
    "# )\n",
    "\n",
    "# lr_oof_train, lr_oof_test = get_oof(lr, X_train_std, y_train, X_test_std, kfold) # Logistic Regression\n",
    "# rfc_oof_train, rfc_oof_test = get_oof(rfc, X_train, y_train, X_test, kfold) # Random Forest\n",
    "# xgb_oof_train, xgb_oof_test = get_oof(xgbc, X_train, y_train, X_test, kfold) # Xgboost\n",
    "# lgb_oof_train, lgb_oof_test = get_oof(lgbc, X_train, y_train, X_test, kfold) # LightGBM\n",
    "\n",
    "# print(\"Training is complete\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [],
   "source": [
    "# x_2nd_train = np.concatenate(( lr_oof_train, rfc_oof_train, xgb_oof_train, lgb_oof_train), axis=1)\n",
    "# x_2nd_test = np.concatenate(( lr_oof_test, rfc_oof_test, xgb_oof_test, lgb_oof_test), axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [],
   "source": [
    "# lgbm_params_2 = {\n",
    "#         'objective': 'binary',\n",
    "#         'metric': 'binary_logloss',\n",
    "#         'boosting': 'gbdt',\n",
    "#         'device': 'cpu',\n",
    "#         'feature_fraction': 0.486,\n",
    "#         'num_leaves': 158,\n",
    "#         'lambda_l2': 50,\n",
    "#         'learning_rate': 0.01,\n",
    "#         #'num_boost_round': 1000,\n",
    "#         #'early_stopping_rounds': 10,\n",
    "#         'verbose': 1,\n",
    "#         'bagging_fraction_seed': RANDOM_SEED,\n",
    "#         'feature_fraction_seed': RANDOM_SEED,\n",
    "#     }\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [],
   "source": [
    "# train_2nd_data = lgb.Dataset(data = x_2nd_train, label = y_train)\n",
    "\n",
    "# lgb.cv(lgbm_params_2, train_2nd_data,  num_boost_round=30000, early_stopping_rounds=10, verbose_eval=100)\n",
    "# #iters=12000"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [],
   "source": [
    "# lgbm2 = lgb.train(lgbm_params_2, train_2nd_data,  num_boost_round=12000, verbose_eval=100)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [],
   "source": [
    "# y_test = lgbm2.predict(x_2nd_test).reshape(-1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Generate submissions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [],
   "source": [
    "use_lgb_only = True ## use lgm model only or stacking model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [],
   "source": [
    "if use_lgb_only:\n",
    "    y_test = y_test_lgb\n",
    "else:\n",
    "    #y_test = y_test_stacking_lgb\n",
    "    y_test = y_test_stacking_diverse"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [],
   "source": [
    "submission_id = datetime.datetime.now().strftime('%Y-%m-%d-%H%M')\n",
    "df_submission = pd.DataFrame({\n",
    "    'test_id': range(len(y_test)),\n",
    "    'is_duplicate': y_test\n",
    "})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [],
   "source": [
    "def recalibrate_prediction(pred, train_pos_ratio=0.3692, test_pos_ratio=0.165):\n",
    "    a = test_pos_ratio / train_pos_ratio\n",
    "    b = (1 - test_pos_ratio) / (1 - train_pos_ratio)\n",
    "    return a * pred / (a * pred + b * (1 - pred))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_submission['is_duplicate'] = df_submission['is_duplicate'].map(recalibrate_prediction)\n",
    "df_submission = df_submission[['test_id', 'is_duplicate']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x10b86f278>"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAdIAAAEPCAYAAAD2wEXHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAHjtJREFUeJzt3X28HVV56PHfE4hJKOQECNUEWqj4ghYVbimvirRirVIoVrB+qCAqxWtvLVBb8R1bqkKhF+yLvSrWiF6vYFCkIuoVBAWFqBAsVYpQQSEpkuA5SYCEkPP0j5kdNif7nCQzezOzT37fz2c+c86aNWuedd6eMzNr1kRmIkmSqpnRdACSJA0zE6kkSTWYSCVJqsFEKklSDSZSSZJqMJFKklSDiVSSpBpMpJIk1WAilSSpBhOpJEk1mEglSarBRCpJUg0mUkmSajCRSpJUw/ZNB6D+iIjHKP4xWtV0LJI0ROYC45lZOR+G7yOdHiJiHIiRkZGmQ5GkoTE2NgaQmVn5Cq1npNPHqpGRkZHR0dGm45CkoTFv3jzGxsZqXcnzHqkkSTWYSCVJqsFEKklSDSZSSZJqMJFKklSDo3YlaZrITFasWMHatWsZHx9vOpzGzJgxg+233565c+fyS7/0SwM/nolUkqaBzOS+++5j9erVzJo1i+22267pkBqzfv16Hn74YUZHR9lpp51YuHAhM2YM7gKsiVSSpoEVK1awevVqnvrUp7LLLrs0HU7jxsfHWblyJStWrGBsbIydd955YMcykQqAvd5+ZWPHvvucoxo7tjRdrF27llmzZplESzNmzGD+/PmsWrWKNWvWDDSROthIkqaB8fHxbfpybi8Rwfbbbz/w+8UmUkmSajCRSpJUg4lUkqQaTKSSJNVgIpUktd66des488wzWbhwIXPmzOHggw/m6quvbjoswMdfJGnaa/Lxto66j7mdfPLJXHbZZZx++uk84xnPYNGiRbz85S/nuuuu45BDDulTlNWYSCVJrbZkyRI++9nPcsEFF3D66acDcNJJJ7Hvvvty5pln8s1vfrPR+Ly0K0lqtcWLFzNz5kxOOeWUjWWzZ8/mjW98I9dffz3Lly9vMDoTqSSp5W655Rb22WcfdtxxxyeUH3jggWQmS5cubSiygolUktRqy5cvZ8GCBZuUd8qWLVv2ZIf0BI0m0ohYEBHnRMQ3ImJ1RGREHNGj3t3ltonLOT3qzouIj0bEAxHxUERcExH7TXL8YyLi5ohYGxE/jYizImKT+8ZNtylJ27JHHnmEWbNmbVI+e/bsjdub1PRgo2cDZwJ3Aj8ADp2i7veBCyeU3db9SUTMAK4EngecD6wE/gS4NiJ+IzPv6qr7cuBy4BrgLeU+7wXml5+3ok1J2tbNmTOHdevWbVK+du3ajdub1HQi/T4wPzNXRsSxwBemqHtvZn56M+0dR5GMX5mZlwNExKXAHcBZwElddc8HbgFelpkbyrqrgHdExN9n5o9b0qYkbdMWLFjQc0BRp2zhwoVPdkhP0Oil3cxcnZkrt7R+RMyKiB2mqHIcsAz4YtcxHgAuBY6NiJllO88Fngt8pJPwSh+m+Jq8qg1tSpJgv/324/bbb2fNmjVPKL/pppsAeMELXtBEWBsN02Cj3wEeAh6KiLsi4tQedfYHvp+ZOaF8CbAT8IyuegDf666UmcuAe7u2N92mJG3zjjvuONavX89FF120sWzdunV84hOf4LDDDmv8jLTpS7tb6gfAtygufe4G/DHwkYjYJTO7BxwtoLg/OVHnmsBC4Edlve7yiXW7vytNtrlRRIz2qN9tZDPbJWkoHXTQQRx//PG87W1vY/ny5ey999588pOf5J577mHRokVNhzcciTQzj+n+PCI+AVwPvCci/jkzx8pNc4BN70jD2q7t3evJ6nZfPm6yTUkScPHFF/Oe97yHiy++mF/84hc8//nP58tf/jKHHXZY06ENRyKdKDM3RMSFwGeBQ4CvlJseATYdIw2zu7Z3ryer2z2Wusk2N8rMeT3qb1SesXpWKmkTdee5bYPZs2dz3nnncd555zUdyiaG6R7pRD8r17t0lS3n8Uus3Tply7rqMUXd7qd7m2xTktRyw5xIn16uH+gqWwr8RkTEhLoHAWsonlft1AM4oLtSRCwE9uja3nSbkqSWa30ijYhdygkMustmA38JrAa+07VpMcVAnd/vqjsfOB74YmauB8jMfwduB06NiO269n8zMA5c1oY2JUnt1/g90oh4d/nhc8r1iRHxQmA0M/8ROAZ4V0QsBu4GdgVeBzwLeHNmdj9YtBi4Ebg4Is4HVlDMGDQDeN+EQ/8lcAXw1Yi4BNgX+FOK50DvaFGbkqQWazyRAmdP+PwN5foe4B+Bf6M40zuR4tGXdcDNwFsz80vdO5aDkF4BnAf8GcXo1yXASZl554S6X4qIP6CYSegfKC4R/83EeJpuU5LUbrHpnAAaRhExOjIyMjI6urnHTXvb6+1X9jmiLTcdRhRKTbvnnnsA2HPPPRuOpF0293WZN28eY2NjY5t7MmIqrb9HKknavBkzZrBhw4bNV9yGZCaPPfYYM2YMNtWZSCVpGpg9ezbr1q3jwQcfbDqUVhgfH+eBBx7g0Ucf3eSF4P3WhnukkqSa5s+fz7p167j//vsZHR1lu+222/xO09SGDRtYv3494+PjzJ07l5GRwc5VYyKVpGkgIth9991ZsWIFa9euZXx8vOmQGjNz5kzmzJnDyMgIO+ww1QvD+sNEKknTRESw2267NR3GNsd7pJIk1WAilSSpBhOpJEk1mEglSarBRCpJUg0mUkmSajCRSpJUg4lUkqQaTKSSJNVgIpUkqQYTqSRJNZhIJUmqwUQqSVINJlJJkmowkUqSVIOJVJKkGkykkiTVYCKVJKkGE6kkSTWYSCVJqsFEKklSDSZSSZJqMJFKklSDiVSSpBq2OpFGxI8i4q0RsdsgApIkaZhUOSMN4Dzg3ohYHBG/GxHR57gkSRoKW51IM3Mf4HDgM8DLgCuBeyLiryJizz7HJ0lSq1W6R5qZ12fm64EFwP8ElgHvAe6KiK9FxKsjYmYf45QkqZVqDTbKzDWZ+bHMPBjYF7gUOBL4f8CyiDg/IvboQ5ySJLVS7VG7ETEjIo4GPgAcXxZ/C7gZOAO4PSJ+r+5xJElqo8qJNCKeGREfBH4GfBE4FPgQsE9mHpGZLwOeC9wJnN+PYCVJapvtt3aHiHgd8AbghWXRtcCfA5/PzPXddTPzPyLiQuBjNeOUJKmVtjqRAp8AHqA4y/xYZt65mfo/Aj5b4TiSJLVelUT6auCLE88+J5OZNwE3VTiOJEmtt9WJNDMXDyIQSZKGUZUpAt8bEUun2H5LRLyjXliSJA2HKqN2X0UxwGgy36C4/CtJ0rRXJZH+GsUAosn8B/D0auFIkjRcqk5aP2+K7SPAdtXCkSRpuFRJpD8Ejp5i+9EUZ6WSJE17VRLpvwCHRsTHI2KXTmFE7BIRF1HMcPQv/QpQkqQ2q/L4y0ci4reA1wOvi4h7y017UCTmxZn5T32MUZKk1qr6GrXXAK8FvgqsK5ergBMy0xG7kqRtRpWZjQDIzM9QvNxbkqRtVu3XqNUREQsi4pyI+EZErI6IjIgjJql7TETcHBFrI+KnEXFWRGzyj0BEzIuIj0bEAxHxUERcExH7DXObkqT2qnRGGhE7AH8IPBPYleKRmG6ZmW/agqaeDZxJ8aq1H1AMVOp1vJcDlwPXAG8Bnge8F5hfft6pNwO4stx+PrAS+BPg2oj4jcy8a9jalCS1W5XXqB0AfAnYjU0TaEcCW5JIvw/Mz8yVEXEs8IVJ6p0P3AK8LDM3lHGsAt4REX+fmT8u6x1HkYxfmZmXl/UuBe4AzgJOGsI2JUktVuXS7gXAHOCPgKcBM3ssT9mShjJzdWaunKpORDyX4gXhH+kkp9KHKeJ/VVfZccAyiheNd47xAHApcGxEzBymNiVJ7VclkR4A/F1mfjYzf56ZG3otfYxx/3L9ve7CzFwG3Nu1vVP3+5mZE9pYAuwEPGPI2pQktVyVRLqa4sXeT5YF5Xp5j23LgYUT6k5Wj666w9LmRhExOtVCMTWjJOlJViWRXg78Tr8DmcKccr2ux7a1Xds7dSer193WsLQpSWq5Kon0bcDuEXFBROzZ74B6eKRcz+qxbXbX9k7dyep1tzUsbW6UmfOmWoCxHu1JkgasSiJ9gOIe358B/xkR6yPi0QlLr7OtqjqXOxf02LaAYtBOd93J6tFVd1jalCS1XJXnSC+heLzlybK0XB8A3NwpjIiFFPP7Lp1Q99CIiAkDeQ4C1lA8rzpMbUqSWq7KpPWvHUQgUxzv3yPiduDUiPh414jgNwPjwGVd1RdTPFry+xT3comI+cDxwBczc/0wtSlJar/Kc+32S0S8u/zwOeX6xIh4ITCamf9Ylv0lcAXw1Yi4BNgX+FOKZzbv6GpuMXAjcHFEnA+soJgxaAbwvgmHHpY2JUktFps+yrgFOxVT3L2GYvTuU4G3Z+atETEPeAVwbfn85Ja0NVkA92TmXl31jqWY9ec5FPdp/wU4OzMfm9DezsB5wLEUo1+XAG/NzJuZYFja3BIRMToyMjIyOjpaZXf2evuVlfbrh7vPOaqxY0vats2bN4+xsbGxctBmJVudSCNiDvAV4EUUj2vMAl6amdeUk7PfC3w0M99bNShtPROpJG29fiTSKqN23wccTHE/by+65tstz7o+D/xu1YAkSRomVRLp8RRnnJcBvaYC/DFFgpUkadqrkkh3B26dYvtDwNxq4UiSNFyqJNIH6T2ZQMdz6T2PrCRJ006VRHoN8Ppy0NETlFMGvgH4at3AJEkaBlUS6V8Bu1I8rnEqxSxHL42Isylm9FkPfKBvEUqS1GJbnUjLiQVeSjFa9/3l+kzgXcB/UTwK89N+BilJUltVmtkoM5cA+0bEfhQTDwTFaN3v9XhZtSRJ01atKQIzcylPnIxdkqRtSpV7pJIkqbTVZ6QRsZ7Nv0YtM7PXi6slSZpW+vU+0u2BvSnexfkD4N9qxiVJ0lDo6/tII+Jwirl2T60TlCRJw6Kv90gz85vAIuBv+9muJEltNYjBRndQXOKVJGnaG0Qi7bynVJKkaa/KqN0TJtm0C3AkcDTwiTpBSZI0LKqM2v00xajd6LFtA/BJ4Iw6QUmSNCyqJNKX9ihLiter/WdmrqoXkiRJw6PK4y9XDyIQSZKGkVMESpJUQ5XBRh+tcJzMzDdV2E+SpFarco/0FB6fInDigKOpyk2kkqRpp8ql3YUUr077EnA4ML9cXgxcCdwCLABmdi1P6UewkiS1TZVEeg6wIjN/PzOvz8wHy+VbmXkMxejdczNzQ/fS37AlSWqHKon094Arptj+xbKOJEnTXpVEOpvi8u5kdi/rSJI07VVJpN8G3hIRh07cEBGHAW8p60iSNO1VGbX758C3gG9FxI3A7WX5PsDBwGrgrf0JT5Kkdqsys9FtEXEA8EHgKOCQctMjwGXAOzPzzv6FKElSe1U5IyUz7wJeHRHbAU8ri//L0bmSpG1NpUTaUSbO+/oUiyRJQ6fSXLsRsWNEvDMiro2IH0XEwWX5/LL8Wf0NU5Kkdqoy1+6uwPXAM4GfAE8HdgDIzBURcQrFS77/oo9xSpLUSlUu7f4NxbOih1Ak0p9P2H45cGTNuCRJGgpVLu0eDXw4M7/L45PUd/sJ8Cu1opIkaUhUSaS7AT+eYvtjlJd6JUma7qok0vsp7otOZn/gp9XCkSRpuFRJpF8G3hgRT524oZyo4SSmntRekqRpo0oi/WuKe6O3AGeXH782Ij5FMZr3fopXrUmSNO1tdSLNzGXAoRSJ9E1AACcDJwDfAF6UmSv7GKMkSa1VdYrAnwBHRcQuwLMpkumdmTnxURhJkqa1rUqkEbEj8L+Br2Xm4sx8EPjOQCKTJGkIbNWl3cxcA5wIjAwmHEmShkuVwUY/BPbsdyCSJA2jKon0PODNEbF3v4ORJGnYVBls9HTgXuC2iLiCYpajhyfUycz8YN3gJElqu6qT1nccP0mdBEykkqRpr0oifWbfo5AkaUht0T3SiDiwfGaUzLxrS5Z+BhkRR0RETrLsM6HuoRFxfUQ8HBH/FREfiohNJtGPiFkRcW5ELIuIRyLixoh4ySTHb6xNSVK7belgo+8Av9v5JCJ2jIjPRMRzBxPWpC6kePyme1nWFdd+wNXAbODPgYsoZl+6pEdbi4AzgE8DpwHjwFURcUh3pRa0KUlqsS29tBsTPp8FvIYiAfywrxFN7brMvHyK7R8AVgJHlM+8EhF3Ax+LiN/OzGvKsgMp4j8jMy8syy4GbgPOBQ5vQ5uSpPar8vhLoyJip4jY5B+AiJgLvBS4uJOcShcDa4BXd5UdB6yn+EcAgMxcC3wceGFELGhJm5Kklhu2RPopYBXwSER8LSKe17XteRRn2N/r3iEzHwWWUrwntWN/4PYJiQxgCcXZ934taVOS1HKVJq1vwKPAYuAqYAXwfOAvgOsj4jcz8w5gQVl3eY/9lwPd9ykXAPdNUg9gYVe9JtvcKCJGe5V3cdpGSWrA1iTSV0TE08qPd6B4VvT4cuDMRJmZF9SO7vHGvg18u6voioj4V4qzurOAPwLmlNvW9Whibdd2yo8nq0dX3abblCS13NYk0hPKpdubJqmbQN8Sac8DZN4aEV8HOo+XPFKuZ/WoPrtre6fuZPW622q6zY0yc16v8o7yjNWzUkl6km1pIv2tgUZR3c94PJF2LpUu6FFvAV2PyZR1J6tHV92m25QktdwWJdLMvG7QgVT0dOCB8uPbgMeAA4DPdypExFMoBvp8pmu/pcBpEbHjhMFBB5XrW1vSpiSp5YZi1G5E7Naj7IUUZ8pfBcjMMeDrwInlC8g7TgR2BD7XVbYYmAmc0tXeLOD1wA2ZuawlbUqSWm5YRu1eEhEPUww4WgHsC5xafvy+rnrvKutcGxEXAXsAbwWuysyvdypl5k0R8Tngb8vnO+8CXkfxntWTJxy7sTYlSe03FGekwOXAbhTJ5p+AV1FcAv3NzPxpp1Jm3gwcSTEi9gLgj4GP0fstNScBHyrXf09xNvmKzLyhu1IL2pQktVhkZtMxqA8iYnRkZGRkdHRzj5v2ttfbr+xzRFvu7nOOauzYkrZt8+bNY2xsbGxzT0ZMZVjOSCVJaiUTqSRJNZhIJUmqwUQqSVINJlJJkmowkUqSVIOJVJKkGkykkiTVYCKVJKkGE6kkSTWYSCVJqsFEKklSDSZSSZJqMJFKklSDiVSSpBpMpJIk1WAilSSpBhOpJEk1mEglSarBRCpJUg0mUkmSajCRSpJUg4lUkqQaTKSSJNVgIpUkqQYTqSRJNZhIJUmqwUQqSVINJlJJkmowkUqSVIOJVJKkGkykkiTVYCKVJKmG7ZsOQNrr7Vc2cty7zzmqkeNKml48I5UkqQYTqSRJNZhIJUmqwUQqSVINJlJJkmowkUqSVIOPv2ib1dRjN+CjN9J04hmpJEk1mEglSarBRCpJUg3eI5Ua0OT92SZ4T1jTmWekkiTVYCKVJKkGL+1KGjgfNdJ0ZiKVNK35mr4nz7b6D5OXdiVJqsEz0oZExCzgr4ETgZ2BW4F3ZebVjQYmqS+21bOzbZFnpM1ZBJwBfBo4DRgHroqIQ5oMSpK0dTwjbUBEHAi8BjgjMy8syy4GbgPOBQ5vMDxJ0lYwkTbjOGA9cFGnIDPXRsTHgfdHxILMXN5YdJKG2rY24UfTTKTN2B+4PTPXTChfAgSwH/CERBoRo5tpc2RsbIx58+ZVCmjV2scq7SdJbTDv/1RLZ2NjYwBz6xzbRNqMBcB9Pco7yXNhxXZzbGxsVYX9Rsr1WMXjDiP7vG2wz9uGkbF1QLU+z6UYo1KZibQZc4B1PcrXdm1/gsysdqq5BTpnu4M8RtvY522Dfd42NN1nR+024xFgVo/y2V3bJUlDwETajOUUl3cn6pQtexJjkSTVYCJtxlJgn4jYcUL5QeX61ic5HklSRSbSZiwGZgKndArKmY5eD9yQmZ6RStKQcLBRAzLzpoj4HPC3EbEAuAt4HbAncHKTsUmSto6JtDknAWeX652BHwCvyMwbGo1KkrRVIjObjkENa3roeBPs87bBPm8bmu6ziVSSpBocbCRJUg0mUkmSajCRSpJUg4lUkqQaTKTTWETMiohzI2JZRDwSETdGxEu2cN/dI+LSiBiNiFURcXlE/NqgY66rap8j4g8i4pKI+ElEPBwRt0fEeRExsrl9m1bn+zyhnS9HREbEhYOIs5/q9jkiToiIJRHxUEQ8GBHXRcSBg4y5rpq/z0dGxLURsTIifhER34mIVw865joiYkFEnBMR34iI1eXP5hFbsf9zIuIrEbGm/B5/MiLmDyJWE+n0tgg4A/g0cBrFq4KuiohDptqpnLrwG8CLgPcDZwH/A7g2InYeZMB9sIgKfQY+CjwH+BTwZ8BXy/UNETF7qh1bYBHV+rxRRBwFHD6Q6AZjERX7HBF/A3wSuK3c968oJkV52qCC7ZNFVPt9/j3gaxTzBpwFvAfYAFwSEW8cZMA1PRs4E9iD4jn7LRYRewDfBPYG3gmcDxwNfC0iZvY5TshMl2m4AAcCCZzeVTYbuBP45mb2fRvFL+n+XWX7AI8Bf9103wbU5yN6lJ1Utndy030bRJ+76j8FuAN4b9nWhU33a4Df50PLn+1XNt2PJ7HPV1G8/3hWV9mssuy6pvs2Rdw7AbuWHx9b9v+ILdz3w8AaYPeusiPLNt7Q71g9I52+jgPWAxd1CjJzLfBx4IXl1IRT7XtjZt7Ste/twNVAmy8HVe5zZl7bo/gL5fo5fYyx3+p8nztOo3gH7vkDibD/6vT5NOC7mfmFiJjR48URbVWnz3OBX2Tmxncglx//gha/sjEzV2fmyoq7vwq4IjPv62rv6xT/MPb9b5iJdPraH7g9M9dMKF8CBLBfr50iYgbwfOB7PTYvAZ4VETv0M9A+qtTnKXQu9a2oG9gA1epzRDyN4lLfOzPz4cGE2Hd1+vwS4LsR8QFgDFgdEXdHxB8NJtS+qdPn64Bfj4izI2LvcjkbeBbwd4MJtzkRsTvwy0z+N2z/fh/TuXanrwUUl24mWl6uF06y3y4Ul32W99i2nOKXtjPRfttU7fNkzqS4l/T5OkENWN0+fxD4D4r7bsOiUp/L+/u7Aq+h+L6eCTwI/C/g0xHxcGZ+ode+LVDn+/x+inuF7wLeXZatAY7JzP/ftwjbo3N2PtnfsF+OiO0yc0O/Dmginb7mAOt6lK/t2j7ZflTct2lV+7yJiDgBeCPwwcxs4z8NHZX7XI5SPQl4cZY3kYZE1T53LuPuChycmTcBRMQXKO41vpfHL+e3TZ2f7XUUlzQ/R9G/7YBTgUsj4iWZ+d1+BtoCW/o3bOLZfWUm0unrEYozy4lmd22fbD8q7tu0qn1+goh4EcW9pyspLnu2WaU+R0QAHwIuy8zrBxTboNT92f5JJ4lCcb8wIhYDp0XEjj0un7ZBnZ/tf6AYrPSbmTkOEBGXAv8OXAgc1sc42+BJ/xvmPdLpazmPX+Lo1imb7OXhD1L8JzfZvknvSyZtULXPG0XEC4ArKIbb/2E/L/8MSNU+v5Lij+s/R8RenaXcNrf8vK1XHur+bN/fY9v9FLct2vrccKU+R8RTgFOAL3WSKEBmrqcYzXtgREy3E6rO36fJvl4/7/fvtYl0+loK7NNjVOJB5frWXjuVv2z/BhzQY/NBwI9bPCilUp87ImJv4CvAz4GjMvOh/ofYd1X7/KsUv//XAD/pWgBeX3784v6G2jd1fraXArv32LwHxX3TB/sVZJ9V/T7vSnHlcbse22aW26IvEbZEOVL3AXr/DTuQ4mvZVybS6WsxxS/KKZ2CiJhF8UfyhsxcVpb9akTs02PfgyNi/659nw38NsV9lraq3Ody9OrXKJ4xfFlmtnmkbreqff5XirPSiQvAl8qPbx549NXU+dn+HPArEfHSrn3nUjwS8e3MbOtti6p9/jkwCvxB90QEZUI+GritPDsdWp2RyBOKLwOOKUfwduq9hGKkcv//hjX90K3L4BbgUuBR4FyKwQU3lJ8f1lXn2uLH4An77UQx+GI58BfA6cBPgZ9RPiDd1qVGn5dSXLY+F3jthOWQpvs1iD5P0lbrJ2So+X3eAfgRsIpiRqPTKa7APGHfNi41+vyu8vv6vbK/bwV+WJb9YdP92kyf310u/7eM9+Pl53/aVedu4O4J+/0KxWNrdwBvAd5BcbVhKfCUvsfZ9BfKZXALxY3188qEuJbiGaojJ9Tp+QeW4lLX5yiftaO4b/j0pvs0qD6Xv6STLYua7tegvs892hqWRFrnZ/tpFFNBPkgx6OR64PCm+zTgPp8A3EQxCcPDwI0MwexOU/xO3t1VZ5NEWpb/OsVUnw+V/f4UsNsg4ozygJIkqQLvkUqSVIOJVJKkGkykkiTVYCKVJKkGE6kkSTWYSCVJqsFEKklSDSZSSZJqMJFKklSDiVSSpBr+G1Agz2PeBs4JAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "pd.DataFrame(y_test).plot.hist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Test duplicates with >0.9 confidence: 50977\n",
      "Test mean prediction: 0.13191317707235767\n",
      "Calibrated mean prediction: 0.07926094608141294\n"
     ]
    }
   ],
   "source": [
    "print('Test duplicates with >0.9 confidence:', len(df_submission[df_submission.is_duplicate > 0.9]))\n",
    "print('Test mean prediction:', np.mean(y_test))\n",
    "print('Calibrated mean prediction:', df_submission['is_duplicate'].mean())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [],
   "source": [
    "#final_cv_score = -scores.mean()\n",
    "#model_clf = \"lgbfull\"\n",
    "#model_clf = 'xgbfull'\n",
    "#model_clf = \"lgb+xgbfull\"\n",
    "#model_clf = \"siamese_lstm\"\n",
    "#model_clf = \"lrfull\"\n",
    "#model_clf = 'rfcfull'\n",
    "#model_clf = 'stackfull'\n",
    "#model_clf = \"stackcv\"\n",
    "model_clf = \"lgbfullsk\"\n",
    "# model_clf = \"stacklgb+xgb\"\n",
    "# model_clf = \"stackwithlgb\"\n",
    "# model_clf = \"stackwithnoprob\"\n",
    "# model_clf = \"stackfinal\"\n",
    "# model_clf = \"lgbfull2\"\n",
    "# model_clf = 'lgbrandom'\n",
    "# model_clf = 'stacking-diverse'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_submission.to_csv(\n",
    "    project.submissions_dir + f'{submission_id}-submission-draft-cv-{model_clf}.csv',\n",
    "    header=True,\n",
    "    float_format='%.8f',\n",
    "    index=None,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
